@archimonde12/llm-proxy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +272 -0
- package/dist/adapters/base.js +2 -0
- package/dist/adapters/deepseek.js +78 -0
- package/dist/adapters/index.js +20 -0
- package/dist/adapters/ollama.js +182 -0
- package/dist/adapters/openaiCompatible.js +50 -0
- package/dist/admin/auth.js +37 -0
- package/dist/admin/configStore.js +80 -0
- package/dist/admin/envStore.js +149 -0
- package/dist/admin/routes.js +360 -0
- package/dist/cli/bin.js +10 -0
- package/dist/cli/commands/config.js +31 -0
- package/dist/cli/commands/doctor.js +107 -0
- package/dist/cli/commands/init.js +68 -0
- package/dist/cli/commands/start.js +38 -0
- package/dist/cli/commands/status.js +23 -0
- package/dist/cli/index.js +22 -0
- package/dist/config/defaultModelsFile.js +16 -0
- package/dist/config/load.js +221 -0
- package/dist/config/mergeHeaders.js +33 -0
- package/dist/config/paths.js +45 -0
- package/dist/config/schema.js +59 -0
- package/dist/config.js +25 -0
- package/dist/http.js +69 -0
- package/dist/index.js +30 -0
- package/dist/observability/metrics.js +102 -0
- package/dist/observability/modelMessageDebugStore.js +69 -0
- package/dist/observability/modelRequestStore.js +52 -0
- package/dist/observability/requestId.js +21 -0
- package/dist/observability/requestRecorder.js +48 -0
- package/dist/observability/summary.js +56 -0
- package/dist/observability/tokenUsage.js +46 -0
- package/dist/server.js +442 -0
- package/dist/startupLog.js +114 -0
- package/dist/types.js +2 -0
- package/dist/upstreamProbe.js +53 -0
- package/dist/version.js +19 -0
- package/package.json +73 -0
- package/ui/dist/assets/index-CDUAKry5.css +1 -0
- package/ui/dist/assets/index-Dq3YzAqp.js +13 -0
- package/ui/dist/index.html +16 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, as long as the following conditions are met:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
|
12
|
+
copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# llm-proxy
|
|
2
|
+
|
|
3
|
+
**llm-proxy** is a lightweight, high-performance gateway designed to unify multiple LLM backends (Ollama, vLLM, OpenAI-compatible servers, etc.) into a single, standardized OpenAI-compatible API endpoint.
|
|
4
|
+
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://nodejs.org/)
|
|
7
|
+
[](https://pnpm.io/)
|
|
8
|
+
[](https://www.typescriptlang.org/)
|
|
9
|
+
[](https://www.fastify.io/)
|
|
10
|
+
[](#)
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Key features
|
|
15
|
+
|
|
16
|
+
* **Unified interface**: Access different backends (Ollama, vLLM, etc.) using the standard OpenAI SDK and payload format.
|
|
17
|
+
* **Multi-backend support**: Seamlessly route requests to various providers via a simple `models.json` configuration.
|
|
18
|
+
* **Dynamic configuration**: Update your model list and backend URLs on-the-fly via the **Admin API** without restarting the server.
|
|
19
|
+
* **Built-in observability**:
|
|
20
|
+
* **Prometheus metrics**: Monitor request counts, latency, and token usage.
|
|
21
|
+
* **Request history**: Track recent requests via an in-memory buffer.
|
|
22
|
+
* **Web UI**: A built-in dashboard to manage configuration, environment variables, and monitoring (when `ui/dist` is present).
|
|
23
|
+
* **Streaming**: Full support for Server-Sent Events (SSE) for real-time chat completions.
|
|
24
|
+
* **Lightweight and fast**: Built with **Fastify** and **TypeScript** for minimal overhead and maximum throughput.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick start
|
|
29
|
+
|
|
30
|
+
### Prerequisites
|
|
31
|
+
|
|
32
|
+
* [Node.js](https://nodejs.org/) (**v20+ required**)
|
|
33
|
+
* [pnpm](https://pnpm.io/)
|
|
34
|
+
|
|
35
|
+
### Installation
|
|
36
|
+
|
|
37
|
+
**From npm (global CLI)** — no clone required; the published package includes `dist/` and `ui/dist` (dashboard at `/ui` when you run the server).
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
npm install -g @archimonde12/llm-proxy
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Requires **Node.js 20+** (see `engines` in `package.json`). After installing, run `llm-proxy --help` or `llm-proxy start`.
|
|
44
|
+
|
|
45
|
+
**Without global install (npx):**
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npx @archimonde12/llm-proxy --help
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Configuration file resolution is described under [Configuration](#configuration) (important when using a global install from arbitrary working directories).
|
|
52
|
+
|
|
53
|
+
**From source** (development):
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Clone the repository (replace OWNER with your fork or upstream)
|
|
57
|
+
git clone https://github.com/OWNER/llm-proxy.git
|
|
58
|
+
cd llm-proxy
|
|
59
|
+
|
|
60
|
+
# Install dependencies
|
|
61
|
+
pnpm install
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Running the server
|
|
65
|
+
|
|
66
|
+
**Development** (TypeScript; no production UI bundle required for API-only work):
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pnpm dev
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
In **development** (`pnpm dev`), the default bind is **0.0.0.0:8787** (override with `HOST` and `PORT`). In **`llm-proxy start`** / **`pnpm start`**, the default bind is **127.0.0.1:8787** unless you set `HOST`, `PORT`, or pass `--host` / `--port`.
|
|
73
|
+
|
|
74
|
+
**Production** (compiled server and built web UI):
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
npm run build # or: pnpm build — Vite UI bundle + TypeScript; produces ui/dist and dist/
|
|
78
|
+
pnpm start # or: npm start — runs node dist/index.js
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
A full build always runs the UI step first, so `ui/dist` is present afterward. Maintainers can use **npm** or **pnpm** for `build` / `start`; the build script does not invoke `pnpm` internally.
|
|
82
|
+
|
|
83
|
+
### CLI (`llm-proxy`)
|
|
84
|
+
|
|
85
|
+
With **`npm install -g @archimonde12/llm-proxy`**, use the `llm-proxy` command on your `PATH`.
|
|
86
|
+
|
|
87
|
+
From a **source** tree after `npm run build` / `pnpm build`:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pnpm cli -- <subcommand> [options]
|
|
91
|
+
# equivalent:
|
|
92
|
+
node dist/cli/bin.js <subcommand> [options]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
| Command | Purpose |
|
|
96
|
+
| :--- | :--- |
|
|
97
|
+
| `init` | Create a starter `models.json` (wizard when interactive; use `-y` for defaults). Options: `--file <path>`, `-y` / `--yes`. |
|
|
98
|
+
| `start` | Start the HTTP server. Options: `--models <path>`, `--port <port>` (default **8787**), `--host <host>` (default **127.0.0.1**). |
|
|
99
|
+
| `status` | Call public **`GET /healthz`**. Options: `--url <baseUrl>` (default `http://127.0.0.1:8787`). |
|
|
100
|
+
| `doctor` | Validate config, check listen port, optionally ping upstreams. Options: `--models <path>`, `--host`, `--port`, `--deep`. |
|
|
101
|
+
| `config validate` | Validate `models.json` against the schema. Options: `--file <path>` (default `./models.json`). |
|
|
102
|
+
|
|
103
|
+
### Environment variables (server / CLI)
|
|
104
|
+
|
|
105
|
+
| Variable | Used by | Description |
|
|
106
|
+
| :--- | :--- | :--- |
|
|
107
|
+
| `MODELS_PATH` | `pnpm dev`, `start`, `doctor` | Path to `models.json`. For `start` and `doctor`, only used when `--models` is omitted; `pnpm dev` always resolves the file from env / defaults (no `--models` flag). |
|
|
108
|
+
| `PORT` | `pnpm dev`, `start` | Listen port if unset (default **8787**); `start` also accepts `--port`. |
|
|
109
|
+
| `HOST` | `pnpm dev`, `start` | Bind address if unset — **`pnpm dev`** defaults to **0.0.0.0**, **`start`** defaults to **127.0.0.1**; `start` also accepts `--host`. |
|
|
110
|
+
| `LOG_LEVEL` | Server | Optional: `debug`, `info`, `warn`, or `error`; omit to log at all levels. |
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Configuration
|
|
115
|
+
|
|
116
|
+
The proxy uses a `models.json` file to map your custom model IDs to specific backends.
|
|
117
|
+
|
|
118
|
+
**Resolution order** (see [`src/config/load.ts`](src/config/load.ts)):
|
|
119
|
+
|
|
120
|
+
1. **`llm-proxy start --models <path>`** — explicit file path.
|
|
121
|
+
2. **`MODELS_PATH`** — if set, that path is used (a starter file is created if missing).
|
|
122
|
+
3. **Otherwise:** **`./models.json`** relative to the **current working directory** — if it exists, it is used.
|
|
123
|
+
4. **Otherwise:** **`~/.config/llm-proxy/models.json`** — canonical user config; if it exists, it is used (good default for a **global** install when you are not in a project directory).
|
|
124
|
+
5. **Otherwise:** **`~/.config/llm-open-gateway/models.json`** — legacy path for backward compatibility with older installs; used only if the canonical path above does not exist.
|
|
125
|
+
6. **Otherwise:** a starter `models.json` is created at **`./models.json`** in the current working directory.
|
|
126
|
+
|
|
127
|
+
This means a global install does not require a checkout: use a file in the cwd, set `MODELS_PATH`, pass `--models`, or keep your config under `~/.config/llm-proxy/models.json`.
|
|
128
|
+
|
|
129
|
+
### Example `models.json`
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{
|
|
133
|
+
"models": [
|
|
134
|
+
{
|
|
135
|
+
"id": "ollama-llama3",
|
|
136
|
+
"adapter": "ollama",
|
|
137
|
+
"baseUrl": "http://localhost:11434",
|
|
138
|
+
"model": "llama3"
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"id": "vllm-mixtral",
|
|
142
|
+
"adapter": "openai_compatible",
|
|
143
|
+
"baseUrl": "http://localhost:8000",
|
|
144
|
+
"model": "mixtral-8x7b",
|
|
145
|
+
"apiKey": "your-secret-api-key"
|
|
146
|
+
}
|
|
147
|
+
]
|
|
148
|
+
}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Supported adapters
|
|
152
|
+
|
|
153
|
+
| Adapter | Description |
|
|
154
|
+
| :--- | :--- |
|
|
155
|
+
| `ollama` | Optimized for the [Ollama](https://ollama.com/) HTTP API. |
|
|
156
|
+
| `openai_compatible` | Any server implementing the OpenAI `/v1/chat/completions` contract. |
|
|
157
|
+
| `deepseek` | DeepSeek-compatible HTTP API (see [`src/adapters/deepseek.ts`](src/adapters/deepseek.ts)). |
|
|
158
|
+
|
|
159
|
+
### Optional fields (per model)
|
|
160
|
+
|
|
161
|
+
| Field | Description |
|
|
162
|
+
| :--- | :--- |
|
|
163
|
+
| `apiKey` | Secret sent to the upstream (see **Secrets** below). |
|
|
164
|
+
| `apiKeyHeader` | Header name for the API key (requires `apiKey`). |
|
|
165
|
+
| `headers` | Extra static headers as string key/value pairs. |
|
|
166
|
+
| `timeoutMs` | Upstream request timeout in milliseconds. |
|
|
167
|
+
|
|
168
|
+
### Secrets: `apiKey` and `${ENV_VAR}`
|
|
169
|
+
|
|
170
|
+
To avoid committing raw keys, you can set `apiKey` to a **single** environment placeholder: the string must be exactly `${` + the variable name + `}`, where the name uses only letters, digits, and underscores. On load, the server replaces that string with the value from `process.env`. If the variable is unset or empty, `apiKey` is dropped so the rest of the entry can still pass validation.
|
|
171
|
+
|
|
172
|
+
After changing environment variables (for example via **`PUT /admin/env`**), call **`POST /admin/reload`** so `${...}` placeholders are resolved again from the updated process environment.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Web UI (dashboard)
|
|
177
|
+
|
|
178
|
+
The UI is a static SPA served from **`ui/dist`**. The server registers it **only if that directory exists**; otherwise there is no **`GET /`** redirect to the app (see [`src/server.ts`](src/server.ts)).
|
|
179
|
+
|
|
180
|
+
**Prerequisite:** run **`pnpm build:ui`** or a full **`pnpm build`** before starting the server if you want the dashboard.
|
|
181
|
+
|
|
182
|
+
**URLs**
|
|
183
|
+
|
|
184
|
+
* **`GET /`** → **302** to **`/ui/`** when the UI bundle is present.
|
|
185
|
+
* Static assets are served under **`/ui/`**.
|
|
186
|
+
|
|
187
|
+
**In-app routes** (hash-based):
|
|
188
|
+
|
|
189
|
+
* **`#/configuration`** — Edit `models.json` and environment variables through the Admin API.
|
|
190
|
+
* **`#/monitoring`** — Metrics overview with time ranges **15m**, **1h**, and **24h** (aligned with `/admin/metrics/overview`).
|
|
191
|
+
* **`#/models`** — Model list, request logs, and debug message capture.
|
|
192
|
+
|
|
193
|
+
**Security:** When the process binds to a non-loopback address, **`/admin/*`** and **`/ui/*`** are restricted to localhost clients. To use the dashboard remotely, use local access, SSH port forwarding, or a tunnel you trust.
|
|
194
|
+
|
|
195
|
+
**UI development:** There is no separate `package.json` under `ui/`; for a local Vite dev server you can run e.g. `pnpm exec vite --config ui/vite.config.ts` from the repo root. Production assets are built with **`pnpm build:ui`**.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Admin API, health, and metrics
|
|
200
|
+
|
|
201
|
+
*Note: Admin routes are restricted to `localhost` when the server is bound to a non-loopback interface.*
|
|
202
|
+
|
|
203
|
+
### Public health (no admin guard)
|
|
204
|
+
|
|
205
|
+
| Method | Path | Description |
|
|
206
|
+
| :--- | :--- | :--- |
|
|
207
|
+
| `GET` | `/healthz` | Liveness: returns `{ ok: true }` while the process is running. Used by **`llm-proxy status`**. |
|
|
208
|
+
| `GET` | `/readyz` | Readiness. With **`?deep=1`**, probes each model's upstream; may return **503** if any probe fails. |
|
|
209
|
+
|
|
210
|
+
### Admin API (localhost when exposed on a public bind)
|
|
211
|
+
|
|
212
|
+
**Config and environment**
|
|
213
|
+
|
|
214
|
+
| Method | Path | Description |
|
|
215
|
+
| :--- | :--- | :--- |
|
|
216
|
+
| `GET` | `/admin/health` | Process/version hint and active config path (not the same as public `/healthz`). |
|
|
217
|
+
| `GET` | `/admin/config` | Current configuration, write target, and metadata. |
|
|
218
|
+
| `PUT` | `/admin/config` | Replace configuration (validated); writes atomically. |
|
|
219
|
+
| `POST` | `/admin/reload` | Reload `models.json` from disk. |
|
|
220
|
+
| `GET` | `/admin/env` | List relevant env keys and `.env` file metadata. |
|
|
221
|
+
| `PUT` | `/admin/env` | Apply env updates to `.env` and `process.env`. |
|
|
222
|
+
|
|
223
|
+
**Connectivity and metrics**
|
|
224
|
+
|
|
225
|
+
| Method | Path | Description |
|
|
226
|
+
| :--- | :--- | :--- |
|
|
227
|
+
| `POST` | `/admin/test-connection` | Probe a configured **`modelId`** or an inline adapter/baseUrl/model payload. |
|
|
228
|
+
| `GET` | `/admin/metrics/summary` | JSON snapshot from in-process metrics. |
|
|
229
|
+
| `GET` | `/admin/metrics/overview` | Overview for **`range=15m`**, **`1h`**, or **`24h`** (query param). |
|
|
230
|
+
|
|
231
|
+
**Logs and requests**
|
|
232
|
+
|
|
233
|
+
| Method | Path | Description |
|
|
234
|
+
| :--- | :--- | :--- |
|
|
235
|
+
| `GET` | `/admin/requests` | Recent proxy request history (`limit` query param, capped). |
|
|
236
|
+
| `GET` | `/admin/requests/:requestId` | Single request record by id. |
|
|
237
|
+
| `GET` | `/admin/logs/models` | Model-scoped logs with **`range`**, optional **`modelId`**, **`status`**, **`limit`**. |
|
|
238
|
+
| `GET` | `/admin/models/:modelId/debug/messages` | Recent captured **system** / **user** messages for debugging (`limit`, **`roles`**). |
|
|
239
|
+
|
|
240
|
+
### Prometheus
|
|
241
|
+
|
|
242
|
+
| Method | Path | Description |
|
|
243
|
+
| :--- | :--- | :--- |
|
|
244
|
+
| `GET` | `/metrics` | Prometheus text exposition format. |
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## Security and exposure
|
|
249
|
+
|
|
250
|
+
If you intend to expose `llm-proxy` to the internet (e.g., via **ngrok** or a reverse proxy), please follow these best practices:
|
|
251
|
+
|
|
252
|
+
1. **Use HTTPS**: Always use an SSL/TLS tunnel or terminating proxy.
|
|
253
|
+
2. **Admin and UI**: The server restricts `/admin/*` and `/ui/*` to localhost when it detects a non-loopback bind.
|
|
254
|
+
3. **Upstream credentials**: Use `apiKey` / `${ENV_VAR}` in `models.json` for provider authentication.
|
|
255
|
+
|
|
256
|
+
**Example with ngrok:**
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
ngrok http 8787
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## Contributing
|
|
265
|
+
|
|
266
|
+
Pull requests are welcome; please keep changes focused and consistent with existing patterns.
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## License
|
|
271
|
+
|
|
272
|
+
Distributed under the **MIT License**. See [`LICENSE`](LICENSE) for more information.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createDeepseekAdapter = createDeepseekAdapter;
|
|
4
|
+
const mergeHeaders_1 = require("../config/mergeHeaders");
|
|
5
|
+
const http_1 = require("../http");
|
|
6
|
+
function headersToRecord(headers) {
|
|
7
|
+
const out = {};
|
|
8
|
+
headers.forEach((value, key) => {
|
|
9
|
+
out[key] = value;
|
|
10
|
+
});
|
|
11
|
+
return out;
|
|
12
|
+
}
|
|
13
|
+
function contentToText(content) {
|
|
14
|
+
if (content === null || content === undefined)
|
|
15
|
+
return null;
|
|
16
|
+
if (typeof content === "string")
|
|
17
|
+
return content;
|
|
18
|
+
if (Array.isArray(content)) {
|
|
19
|
+
const parts = [];
|
|
20
|
+
for (const block of content) {
|
|
21
|
+
if (!block || typeof block !== "object")
|
|
22
|
+
continue;
|
|
23
|
+
const anyBlock = block;
|
|
24
|
+
if (anyBlock.type !== "text")
|
|
25
|
+
continue;
|
|
26
|
+
if (typeof anyBlock.text === "string")
|
|
27
|
+
parts.push(anyBlock.text);
|
|
28
|
+
}
|
|
29
|
+
return parts.join("\n");
|
|
30
|
+
}
|
|
31
|
+
return String(content);
|
|
32
|
+
}
|
|
33
|
+
function normalizeMessages(messages) {
|
|
34
|
+
return messages.map((m) => {
|
|
35
|
+
const normalized = contentToText(m.content);
|
|
36
|
+
return { ...m, content: normalized };
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
function createDeepseekAdapter(cfg) {
|
|
40
|
+
const outboundHeaders = (0, mergeHeaders_1.mergeModelOutboundHeaders)(cfg);
|
|
41
|
+
return {
|
|
42
|
+
async chatCompletions(req) {
|
|
43
|
+
const url = (0, http_1.joinUrl)(cfg.baseUrl, "v1/chat/completions");
|
|
44
|
+
const forwarded = {
|
|
45
|
+
...req,
|
|
46
|
+
model: cfg.model,
|
|
47
|
+
messages: normalizeMessages(req.messages),
|
|
48
|
+
};
|
|
49
|
+
const res = await (0, http_1.postJson)(url, forwarded, {
|
|
50
|
+
headers: outboundHeaders,
|
|
51
|
+
timeoutMs: cfg.timeoutMs,
|
|
52
|
+
});
|
|
53
|
+
return {
|
|
54
|
+
status: res.status,
|
|
55
|
+
headers: headersToRecord(res.headers),
|
|
56
|
+
body: res.json,
|
|
57
|
+
};
|
|
58
|
+
},
|
|
59
|
+
async chatCompletionsStream(req) {
|
|
60
|
+
const url = (0, http_1.joinUrl)(cfg.baseUrl, "v1/chat/completions");
|
|
61
|
+
const forwarded = {
|
|
62
|
+
...req,
|
|
63
|
+
model: cfg.model,
|
|
64
|
+
stream: true,
|
|
65
|
+
messages: normalizeMessages(req.messages),
|
|
66
|
+
};
|
|
67
|
+
const res = await (0, http_1.postJsonStream)(url, forwarded, {
|
|
68
|
+
headers: outboundHeaders,
|
|
69
|
+
timeoutMs: cfg.timeoutMs,
|
|
70
|
+
});
|
|
71
|
+
return {
|
|
72
|
+
status: res.status,
|
|
73
|
+
headers: headersToRecord(res.headers),
|
|
74
|
+
body: res.body,
|
|
75
|
+
};
|
|
76
|
+
},
|
|
77
|
+
};
|
|
78
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createAdapter = createAdapter;
|
|
4
|
+
const deepseek_1 = require("./deepseek");
|
|
5
|
+
const ollama_1 = require("./ollama");
|
|
6
|
+
const openaiCompatible_1 = require("./openaiCompatible");
|
|
7
|
+
function createAdapter(cfg) {
|
|
8
|
+
switch (cfg.adapter) {
|
|
9
|
+
case "deepseek":
|
|
10
|
+
return (0, deepseek_1.createDeepseekAdapter)(cfg);
|
|
11
|
+
case "ollama":
|
|
12
|
+
return (0, ollama_1.createOllamaAdapter)(cfg);
|
|
13
|
+
case "openai_compatible":
|
|
14
|
+
return (0, openaiCompatible_1.createOpenAICompatibleAdapter)(cfg);
|
|
15
|
+
default: {
|
|
16
|
+
const neverCfg = cfg.adapter;
|
|
17
|
+
throw new Error(`Unsupported adapter: ${neverCfg}`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createOllamaAdapter = createOllamaAdapter;
|
|
4
|
+
const mergeHeaders_1 = require("../config/mergeHeaders");
|
|
5
|
+
const http_1 = require("../http");
|
|
6
|
+
function headersToRecord(headers) {
|
|
7
|
+
const out = {};
|
|
8
|
+
headers.forEach((value, key) => {
|
|
9
|
+
out[key] = value;
|
|
10
|
+
});
|
|
11
|
+
return out;
|
|
12
|
+
}
|
|
13
|
+
function messagesToPrompt(req) {
|
|
14
|
+
// Simple, predictable mapping: keep roles inline for local models.
|
|
15
|
+
return req.messages
|
|
16
|
+
.map((m) => {
|
|
17
|
+
const content = Array.isArray(m.content) ? "" : (m.content ?? "");
|
|
18
|
+
return `${m.role.toUpperCase()}: ${content}`;
|
|
19
|
+
})
|
|
20
|
+
.join("\n");
|
|
21
|
+
}
|
|
22
|
+
function createOllamaAdapter(cfg) {
|
|
23
|
+
const outboundHeaders = (0, mergeHeaders_1.mergeModelOutboundHeaders)(cfg);
|
|
24
|
+
return {
|
|
25
|
+
async chatCompletions(req) {
|
|
26
|
+
const url = (0, http_1.joinUrl)(cfg.baseUrl, "api/generate");
|
|
27
|
+
const body = {
|
|
28
|
+
model: cfg.model,
|
|
29
|
+
prompt: messagesToPrompt(req),
|
|
30
|
+
stream: false,
|
|
31
|
+
options: {
|
|
32
|
+
temperature: req.temperature,
|
|
33
|
+
num_predict: req.max_tokens,
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
const res = await (0, http_1.postJson)(url, body, {
|
|
37
|
+
headers: outboundHeaders,
|
|
38
|
+
timeoutMs: cfg.timeoutMs,
|
|
39
|
+
});
|
|
40
|
+
// Convert Ollama's response into an OpenAI-ish shape so clients
|
|
41
|
+
// can consistently consume `/v1/chat/completions`.
|
|
42
|
+
const content = res.json && typeof res.json === "object" && "response" in res.json
|
|
43
|
+
? String(res.json.response ?? "")
|
|
44
|
+
: "";
|
|
45
|
+
const openaiLike = {
|
|
46
|
+
id: `chatcmpl_ollama_${Date.now()}`,
|
|
47
|
+
object: "chat.completion",
|
|
48
|
+
created: Math.floor(Date.now() / 1000),
|
|
49
|
+
model: cfg.model,
|
|
50
|
+
choices: [
|
|
51
|
+
{
|
|
52
|
+
index: 0,
|
|
53
|
+
message: { role: "assistant", content },
|
|
54
|
+
finish_reason: "stop",
|
|
55
|
+
},
|
|
56
|
+
],
|
|
57
|
+
};
|
|
58
|
+
return {
|
|
59
|
+
status: res.status,
|
|
60
|
+
headers: headersToRecord(res.headers),
|
|
61
|
+
body: openaiLike,
|
|
62
|
+
};
|
|
63
|
+
},
|
|
64
|
+
async chatCompletionsStream(req) {
|
|
65
|
+
const url = (0, http_1.joinUrl)(cfg.baseUrl, "api/generate");
|
|
66
|
+
const body = {
|
|
67
|
+
model: cfg.model,
|
|
68
|
+
prompt: messagesToPrompt(req),
|
|
69
|
+
stream: true,
|
|
70
|
+
options: {
|
|
71
|
+
temperature: req.temperature,
|
|
72
|
+
num_predict: req.max_tokens,
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
const upstream = await (0, http_1.postJsonStream)(url, body, {
|
|
76
|
+
headers: outboundHeaders,
|
|
77
|
+
timeoutMs: cfg.timeoutMs,
|
|
78
|
+
});
|
|
79
|
+
// Ollama returns NDJSON objects per line:
|
|
80
|
+
// { response: "...", done: boolean, ... }
|
|
81
|
+
// Convert to OpenAI ChatCompletions SSE (chat.completion.chunk).
|
|
82
|
+
const encoder = new TextEncoder();
|
|
83
|
+
const decoder = new TextDecoder();
|
|
84
|
+
const stream = new ReadableStream({
|
|
85
|
+
async start(controller) {
|
|
86
|
+
const id = `chatcmpl_ollama_${Date.now()}`;
|
|
87
|
+
const created = Math.floor(Date.now() / 1000);
|
|
88
|
+
let sentRole = false;
|
|
89
|
+
let buffer = "";
|
|
90
|
+
const writeSse = (data) => {
|
|
91
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n\n`));
|
|
92
|
+
};
|
|
93
|
+
try {
|
|
94
|
+
if (!upstream.body) {
|
|
95
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
96
|
+
error: { message: "Upstream returned empty body" },
|
|
97
|
+
})}\n\n`));
|
|
98
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
99
|
+
controller.close();
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
const reader = upstream.body.getReader();
|
|
103
|
+
while (true) {
|
|
104
|
+
const { value, done } = await reader.read();
|
|
105
|
+
if (done)
|
|
106
|
+
break;
|
|
107
|
+
if (!value)
|
|
108
|
+
continue;
|
|
109
|
+
buffer += decoder.decode(value, { stream: true });
|
|
110
|
+
let idx;
|
|
111
|
+
while ((idx = buffer.indexOf("\n")) >= 0) {
|
|
112
|
+
const line = buffer.slice(0, idx).trim();
|
|
113
|
+
buffer = buffer.slice(idx + 1);
|
|
114
|
+
if (!line)
|
|
115
|
+
continue;
|
|
116
|
+
let obj;
|
|
117
|
+
try {
|
|
118
|
+
obj = JSON.parse(line);
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
if (!sentRole) {
|
|
124
|
+
sentRole = true;
|
|
125
|
+
writeSse({
|
|
126
|
+
id,
|
|
127
|
+
object: "chat.completion.chunk",
|
|
128
|
+
created,
|
|
129
|
+
model: cfg.model,
|
|
130
|
+
choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }],
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
const deltaText = obj && typeof obj === "object" && "response" in obj
|
|
134
|
+
? String(obj.response ?? "")
|
|
135
|
+
: "";
|
|
136
|
+
if (deltaText) {
|
|
137
|
+
writeSse({
|
|
138
|
+
id,
|
|
139
|
+
object: "chat.completion.chunk",
|
|
140
|
+
created,
|
|
141
|
+
model: cfg.model,
|
|
142
|
+
choices: [
|
|
143
|
+
{ index: 0, delta: { content: deltaText }, finish_reason: null },
|
|
144
|
+
],
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
if (obj?.done) {
|
|
148
|
+
writeSse({
|
|
149
|
+
id,
|
|
150
|
+
object: "chat.completion.chunk",
|
|
151
|
+
created,
|
|
152
|
+
model: cfg.model,
|
|
153
|
+
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
|
154
|
+
});
|
|
155
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
156
|
+
controller.close();
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
162
|
+
controller.close();
|
|
163
|
+
}
|
|
164
|
+
catch (e) {
|
|
165
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ error: { message: e?.message ?? "Stream error" } })}\n\n`));
|
|
166
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
|
167
|
+
controller.close();
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
return {
|
|
172
|
+
status: upstream.status,
|
|
173
|
+
headers: {
|
|
174
|
+
"content-type": "text/event-stream; charset=utf-8",
|
|
175
|
+
"cache-control": "no-cache",
|
|
176
|
+
connection: "keep-alive",
|
|
177
|
+
},
|
|
178
|
+
body: stream,
|
|
179
|
+
};
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createOpenAICompatibleAdapter = createOpenAICompatibleAdapter;
|
|
4
|
+
const mergeHeaders_1 = require("../config/mergeHeaders");
|
|
5
|
+
const http_1 = require("../http");
|
|
6
|
+
function headersToRecord(headers) {
|
|
7
|
+
const out = {};
|
|
8
|
+
headers.forEach((value, key) => {
|
|
9
|
+
out[key] = value;
|
|
10
|
+
});
|
|
11
|
+
return out;
|
|
12
|
+
}
|
|
13
|
+
function createOpenAICompatibleAdapter(cfg) {
|
|
14
|
+
const outboundHeaders = (0, mergeHeaders_1.mergeModelOutboundHeaders)(cfg);
|
|
15
|
+
return {
|
|
16
|
+
async chatCompletions(req) {
|
|
17
|
+
const url = (0, http_1.joinUrl)(cfg.baseUrl, "v1/chat/completions");
|
|
18
|
+
const forwarded = {
|
|
19
|
+
...req,
|
|
20
|
+
model: cfg.model,
|
|
21
|
+
};
|
|
22
|
+
const res = await (0, http_1.postJson)(url, forwarded, {
|
|
23
|
+
headers: outboundHeaders,
|
|
24
|
+
timeoutMs: cfg.timeoutMs,
|
|
25
|
+
});
|
|
26
|
+
return {
|
|
27
|
+
status: res.status,
|
|
28
|
+
headers: headersToRecord(res.headers),
|
|
29
|
+
body: res.json,
|
|
30
|
+
};
|
|
31
|
+
},
|
|
32
|
+
async chatCompletionsStream(req) {
|
|
33
|
+
const url = (0, http_1.joinUrl)(cfg.baseUrl, "v1/chat/completions");
|
|
34
|
+
const forwarded = {
|
|
35
|
+
...req,
|
|
36
|
+
model: cfg.model,
|
|
37
|
+
stream: true,
|
|
38
|
+
};
|
|
39
|
+
const res = await (0, http_1.postJsonStream)(url, forwarded, {
|
|
40
|
+
headers: outboundHeaders,
|
|
41
|
+
timeoutMs: cfg.timeoutMs,
|
|
42
|
+
});
|
|
43
|
+
return {
|
|
44
|
+
status: res.status,
|
|
45
|
+
headers: headersToRecord(res.headers),
|
|
46
|
+
body: res.body,
|
|
47
|
+
};
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.shouldEnforceLocalhostGuard = shouldEnforceLocalhostGuard;
|
|
4
|
+
exports.isLocalhostRequest = isLocalhostRequest;
|
|
5
|
+
exports.isLocalhostAddress = isLocalhostAddress;
|
|
6
|
+
exports.sendForbiddenNonLocal = sendForbiddenNonLocal;
|
|
7
|
+
/** True when the server accepts remote connections (not loopback-only). */
|
|
8
|
+
function shouldEnforceLocalhostGuard(bindHost) {
|
|
9
|
+
const h = bindHost.trim().toLowerCase();
|
|
10
|
+
if (h === "127.0.0.1" || h === "::1" || h === "localhost")
|
|
11
|
+
return false;
|
|
12
|
+
return true;
|
|
13
|
+
}
|
|
14
|
+
function isLocalhostRequest(req) {
|
|
15
|
+
const raw = req.socket?.remoteAddress ??
|
|
16
|
+
req.ip ??
|
|
17
|
+
"";
|
|
18
|
+
return isLocalhostAddress(String(raw));
|
|
19
|
+
}
|
|
20
|
+
function isLocalhostAddress(addr) {
|
|
21
|
+
if (!addr)
|
|
22
|
+
return false;
|
|
23
|
+
if (addr === "127.0.0.1" || addr === "::1")
|
|
24
|
+
return true;
|
|
25
|
+
if (addr.startsWith("::ffff:")) {
|
|
26
|
+
const v4 = addr.slice("::ffff:".length);
|
|
27
|
+
return v4 === "127.0.0.1";
|
|
28
|
+
}
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
function sendForbiddenNonLocal(reply) {
|
|
32
|
+
return reply.code(403).send({
|
|
33
|
+
error: {
|
|
34
|
+
message: "Admin and UI are only available from localhost when the server accepts remote connections.",
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
}
|