@fugood/buttress-server 2.24.2 → 2.25.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/config/sample.toml +9 -0
- package/lib/index.d.mts +14 -2
- package/lib/index.mjs +22 -24
- package/package.json +3 -2
- package/public/lib/index.d.ts +27 -0
- package/public/lib/index.mjs +110 -0
package/README.md
CHANGED
|
@@ -157,6 +157,25 @@ Examples:
|
|
|
157
157
|
bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
|
|
158
158
|
```
|
|
159
159
|
|
|
160
|
+
## Compatibility Endpoints (Experimental)
|
|
161
|
+
|
|
162
|
+
The server can expose OpenAI- and Anthropic-compatible HTTP endpoints in addition to the native RPC. Each endpoint is opt-in via the TOML config:
|
|
163
|
+
|
|
164
|
+
```toml
|
|
165
|
+
[openai_compat]
|
|
166
|
+
enabled = true
|
|
167
|
+
# cors_allowed_origins = "*" # Or a list of origins; defaults to disabled
|
|
168
|
+
|
|
169
|
+
[anthropic_messages]
|
|
170
|
+
enabled = true
|
|
171
|
+
# cors_allowed_origins = ["http://localhost:3000"]
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
| Endpoint | Config flag |
|
|
175
|
+
| --------------------- | ------------------------------------ |
|
|
176
|
+
| `/oai-compat/v1/*` | `[openai_compat] enabled = true` |
|
|
177
|
+
| `/anthropic-messages` | `[anthropic_messages] enabled = true` |
|
|
178
|
+
|
|
160
179
|
## Session State Cache
|
|
161
180
|
|
|
162
181
|
The server supports session state caching for ggml-llm generators, which saves KV cache state to disk after completions. This enables:
|
package/config/sample.toml
CHANGED
|
@@ -11,15 +11,24 @@
|
|
|
11
11
|
# HF_TOKEN = "your_huggingface_token_here"
|
|
12
12
|
# CUDA_VISIBLE_DEVICES = "0"
|
|
13
13
|
|
|
14
|
+
[autodiscover]
|
|
15
|
+
enabled = true
|
|
16
|
+
|
|
14
17
|
[server]
|
|
15
18
|
port = 2080
|
|
16
19
|
log_level = "info"
|
|
17
20
|
# max_body_size = "100MB" # Supports string (e.g., "100MB", "1GB") or number in bytes
|
|
18
21
|
|
|
19
22
|
[openai_compat]
|
|
23
|
+
enabled = true
|
|
20
24
|
# cors_allowed_origins = ["http://localhost:3000", "https://example.com"] # Restrict to specific origins
|
|
21
25
|
# cors_allowed_origins = "*" # Allow all origins (default)
|
|
22
26
|
|
|
27
|
+
[anthropic_messages]
|
|
28
|
+
enabled = true
|
|
29
|
+
# cors_allowed_origins = ["http://localhost:3000", "https://example.com"]
|
|
30
|
+
# cors_allowed_origins = "*"
|
|
31
|
+
|
|
23
32
|
[runtime]
|
|
24
33
|
cache_dir = "./.buttress-cache"
|
|
25
34
|
# huggingface_token = "hf_xx"
|
package/lib/index.d.mts
CHANGED
|
@@ -229,9 +229,11 @@ type GeneratorConfig = {
|
|
|
229
229
|
type GlobalConfig = {
|
|
230
230
|
runtime?: RuntimeConfig;
|
|
231
231
|
openai_compat?: {
|
|
232
|
+
enabled?: boolean;
|
|
232
233
|
cors_allowed_origins?: string | string[];
|
|
233
234
|
};
|
|
234
235
|
anthropic_messages?: {
|
|
236
|
+
enabled?: boolean;
|
|
235
237
|
cors_allowed_origins?: string | string[];
|
|
236
238
|
};
|
|
237
239
|
} & Record<string, any>;
|
|
@@ -277,18 +279,28 @@ type Config = {
|
|
|
277
279
|
generators: GeneratorConfig[];
|
|
278
280
|
};
|
|
279
281
|
type GeneratorInfo = {
|
|
280
|
-
type: GeneratorType;
|
|
282
|
+
type: GeneratorType; /** Performance score 0–100 from buttress-hardware-guardrails. */
|
|
283
|
+
score?: number; /** Whether the host has an accelerator (GPU/Metal/etc) for this backend. */
|
|
284
|
+
hasGpu?: boolean; /** Usable memory in bytes for this backend (GPU when present, else CPU). */
|
|
285
|
+
usableBytes?: number;
|
|
281
286
|
} & Record<string, any>;
|
|
282
287
|
type ServerInfo = {
|
|
283
288
|
id: string;
|
|
284
289
|
name: string;
|
|
290
|
+
version: string;
|
|
285
291
|
address: string;
|
|
286
292
|
port: number;
|
|
287
293
|
url: string;
|
|
288
294
|
generators: GeneratorInfo[];
|
|
289
295
|
authentication: {
|
|
290
296
|
required: boolean;
|
|
291
|
-
type: string;
|
|
297
|
+
type: string; /** Issuer key id (when type === 'workspace-jwt'). */
|
|
298
|
+
kid?: string; /** True when buttress is paired with a workspace. */
|
|
299
|
+
bound?: boolean;
|
|
300
|
+
}; /** Workspace identity (only present when paired). */
|
|
301
|
+
workspace?: {
|
|
302
|
+
id: string;
|
|
303
|
+
name?: string;
|
|
292
304
|
};
|
|
293
305
|
};
|
|
294
306
|
//#endregion
|