@fugood/buttress-server 2.24.2 → 2.25.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -157,6 +157,25 @@ Examples:
157
157
  bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
158
158
  ```
159
159
 
160
+ ## Compatibility Endpoints (Experimental)
161
+
162
+ The server can expose OpenAI- and Anthropic-compatible HTTP endpoints in addition to the native RPC. Each endpoint is opt-in via the TOML config:
163
+
164
+ ```toml
165
+ [openai_compat]
166
+ enabled = true
167
+ # cors_allowed_origins = "*" # Or a list of origins; defaults to disabled
168
+
169
+ [anthropic_messages]
170
+ enabled = true
171
+ # cors_allowed_origins = ["http://localhost:3000"]
172
+ ```
173
+
174
+ | Endpoint | Config flag |
175
+ | --------------------- | ------------------------------------ |
176
+ | `/oai-compat/v1/*` | `[openai_compat] enabled = true` |
177
+ | `/anthropic-messages` | `[anthropic_messages] enabled = true` |
178
+
160
179
  ## Session State Cache
161
180
 
162
181
  The server supports session state caching for ggml-llm generators, which saves KV cache state to disk after completions. This enables:
@@ -11,15 +11,24 @@
11
11
  # HF_TOKEN = "your_huggingface_token_here"
12
12
  # CUDA_VISIBLE_DEVICES = "0"
13
13
 
14
+ [autodiscover]
15
+ enabled = true
16
+
14
17
  [server]
15
18
  port = 2080
16
19
  log_level = "info"
17
20
  # max_body_size = "100MB" # Supports string (e.g., "100MB", "1GB") or number in bytes
18
21
 
19
22
  [openai_compat]
23
+ enabled = true
20
24
  # cors_allowed_origins = ["http://localhost:3000", "https://example.com"] # Restrict to specific origins
21
25
  # cors_allowed_origins = "*" # Allow all origins (default)
22
26
 
27
+ [anthropic_messages]
28
+ enabled = true
29
+ # cors_allowed_origins = ["http://localhost:3000", "https://example.com"]
30
+ # cors_allowed_origins = "*"
31
+
23
32
  [runtime]
24
33
  cache_dir = "./.buttress-cache"
25
34
  # huggingface_token = "hf_xx"
package/lib/index.d.mts CHANGED
@@ -229,9 +229,11 @@ type GeneratorConfig = {
229
229
  type GlobalConfig = {
230
230
  runtime?: RuntimeConfig;
231
231
  openai_compat?: {
232
+ enabled?: boolean;
232
233
  cors_allowed_origins?: string | string[];
233
234
  };
234
235
  anthropic_messages?: {
236
+ enabled?: boolean;
235
237
  cors_allowed_origins?: string | string[];
236
238
  };
237
239
  } & Record<string, any>;
@@ -277,18 +279,28 @@ type Config = {
277
279
  generators: GeneratorConfig[];
278
280
  };
279
281
  type GeneratorInfo = {
280
- type: GeneratorType;
282
+ type: GeneratorType; /** Performance score 0–100 from buttress-hardware-guardrails. */
283
+ score?: number; /** Whether the host has an accelerator (GPU/Metal/etc) for this backend. */
284
+ hasGpu?: boolean; /** Usable memory in bytes for this backend (GPU when present, else CPU). */
285
+ usableBytes?: number;
281
286
  } & Record<string, any>;
282
287
  type ServerInfo = {
283
288
  id: string;
284
289
  name: string;
290
+ version: string;
285
291
  address: string;
286
292
  port: number;
287
293
  url: string;
288
294
  generators: GeneratorInfo[];
289
295
  authentication: {
290
296
  required: boolean;
291
- type: string;
297
+ type: string; /** Issuer key id (when type === 'workspace-jwt'). */
298
+ kid?: string; /** True when buttress is paired with a workspace. */
299
+ bound?: boolean;
300
+ }; /** Workspace identity (only present when paired). */
301
+ workspace?: {
302
+ id: string;
303
+ name?: string;
292
304
  };
293
305
  };
294
306
  //#endregion