npm - @fugood/buttress-server - Versions diffs - 2.24.2 → 2.25.0-beta.9 - Mend

@fugood/buttress-server 2.24.2 → 2.25.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -157,6 +157,25 @@ Examples:
   bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
 ```
+## Compatibility Endpoints (Experimental)
+The server can expose OpenAI- and Anthropic-compatible HTTP endpoints in addition to the native RPC. Each endpoint is opt-in via the TOML config:
+```toml
+[openai_compat]
+enabled = true
+# cors_allowed_origins = "*"          # Or a list of origins; defaults to disabled
+[anthropic_messages]
+enabled = true
+# cors_allowed_origins = ["http://localhost:3000"]
+```
+| Endpoint              | Config flag                          |
+| --------------------- | ------------------------------------ |
+| `/oai-compat/v1/*`    | `[openai_compat] enabled = true`     |
+| `/anthropic-messages` | `[anthropic_messages] enabled = true` |
 ## Session State Cache
 The server supports session state caching for ggml-llm generators, which saves KV cache state to disk after completions. This enables:

package/config/sample.toml CHANGED Viewed

@@ -11,15 +11,24 @@
 # HF_TOKEN = "your_huggingface_token_here"
 # CUDA_VISIBLE_DEVICES = "0"
+[autodiscover]
+enabled = true
 [server]
 port = 2080
 log_level = "info"
 # max_body_size = "100MB"  # Supports string (e.g., "100MB", "1GB") or number in bytes
 [openai_compat]
+enabled = true
 # cors_allowed_origins = ["http://localhost:3000", "https://example.com"]  # Restrict to specific origins
 # cors_allowed_origins = "*"  # Allow all origins (default)
+[anthropic_messages]
+enabled = true
+# cors_allowed_origins = ["http://localhost:3000", "https://example.com"]
+# cors_allowed_origins = "*"
 [runtime]
 cache_dir = "./.buttress-cache"
 # huggingface_token = "hf_xx"

package/lib/index.d.mts CHANGED Viewed

@@ -229,9 +229,11 @@ type GeneratorConfig = {
 type GlobalConfig = {
   runtime?: RuntimeConfig;
   openai_compat?: {
+    enabled?: boolean;
     cors_allowed_origins?: string | string[];
   };
   anthropic_messages?: {
+    enabled?: boolean;
     cors_allowed_origins?: string | string[];
   };
 } & Record<string, any>;
@@ -277,18 +279,28 @@ type Config = {
   generators: GeneratorConfig[];
 };
 type GeneratorInfo = {
-  type: GeneratorType;
+  type: GeneratorType; /** Performance score 0–100 from buttress-hardware-guardrails. */
+  score?: number; /** Whether the host has an accelerator (GPU/Metal/etc) for this backend. */
+  hasGpu?: boolean; /** Usable memory in bytes for this backend (GPU when present, else CPU). */
+  usableBytes?: number;
 } & Record<string, any>;
 type ServerInfo = {
   id: string;
   name: string;
+  version: string;
   address: string;
   port: number;
   url: string;
   generators: GeneratorInfo[];
   authentication: {
     required: boolean;
-    type: string;
+    type: string; /** Issuer key id (when type === 'workspace-jwt'). */
+    kid?: string; /** True when buttress is paired with a workspace. */
+    bound?: boolean;
+  }; /** Workspace identity (only present when paired). */
+  workspace?: {
+    id: string;
+    name?: string;
   };
 };
 //#endregion