@fugood/buttress-server 2.24.2 → 2.25.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -20,6 +20,84 @@ npx bricks-buttress --config ./config.toml
20
20
  npx bricks-buttress
21
21
  ```
22
22
 
23
+ ## Workspace Binding (`bricks buttress`)
24
+
25
+ By default, a buttress-server runs in **public mode**: any client on the LAN can connect, no auth required. To restrict access to a single BRICKS workspace and enable workspace-scoped JWT auth, **bind** the server with the `bricks buttress` CLI commands. Once bound, the server only accepts WebSocket / file-transfer requests carrying a valid access token signed by that workspace's issuer.
26
+
27
+ The `bricks` CLI is the tool that performs the binding and writes the local state file. Install it first — see the [bricks-cli docs](https://docs.bricks.tools/cli) — then `bricks auth login` with the workspace owner's account before running the commands below.
28
+
29
+ ### Bind a server to a workspace
30
+
31
+ ```bash
32
+ # Pair the local machine's buttress-server with the workspace of the current bricks-cli profile
33
+ bricks buttress bind
34
+
35
+ # Override the auto-detected server id, give it a friendly name, or write to a custom state dir
36
+ bricks buttress bind --server-id buttress-mac-studio --name "Studio LLM" --state-dir /etc/buttress
37
+
38
+ # For headless/remote setups: emit state.json to stdout instead of writing to disk
39
+ bricks buttress bind --print > /etc/buttress/state.json
40
+ ```
41
+
42
+ The state file (`~/.bricks-cli/buttress/state.json` by default, or `$BRICKS_BUTTRESS_STATE_DIR`) stores:
43
+
44
+ - `workspace.id` / `workspace.name` — which workspace this server belongs to
45
+ - `workspace.serverId` — the server's stable id (defaults to `buttress-<machineId>`)
46
+ - `workspace.issuerPublicKey` + `workspace.kid` — Ed25519 SPKI used to verify access tokens
47
+
48
+ **Restart `bricks-buttress` after binding** for the change to take effect — the state file is read once at startup.
49
+
50
+ ### Inspect bindings
51
+
52
+ ```bash
53
+ # Show local state.json + the workspace-side bound list
54
+ bricks buttress status
55
+
56
+ # Same, JSON-formatted
57
+ bricks buttress status --json
58
+ ```
59
+
60
+ ### Discover servers on the LAN
61
+
62
+ ```bash
63
+ # UDP scan + HTTP /buttress/info verification (3s timeout by default)
64
+ bricks buttress scan
65
+
66
+ # UDP only (skip the /buttress/info round-trip)
67
+ bricks buttress scan --udp-only
68
+
69
+ # Machine-readable
70
+ bricks buttress scan --json
71
+ ```
72
+
73
+ `scan` lists every buttress-server visible on the LAN, including unbound (public) ones, with their version, auth state (`open` vs `JWT required` + kid), bound workspace, and per-generator hardware caps (`score`, GPU, usable memory). Servers whose workspace matches your current `bricks-cli` profile are highlighted; this is purely a discovery command and does not mint any tokens.
74
+
75
+ ### Unbind
76
+
77
+ ```bash
78
+ # Remove the binding from the workspace and delete the local state.json
79
+ bricks buttress unbind
80
+
81
+ # Keep the local state file (useful if you only want to revoke server-side)
82
+ bricks buttress unbind --keep-local
83
+ ```
84
+
85
+ After unbinding, restart the server to return it to public mode.
86
+
87
+ ### Issue a long-lived access token
88
+
89
+ For headless callers (CI, ctor agents) that already hold a workspace token, mint a long-lived buttress access token instead of relying on a per-launcher session token:
90
+
91
+ ```bash
92
+ # Default 30-day TTL
93
+ bricks buttress issue-token
94
+
95
+ # Custom TTL (seconds), JSON output for scripting
96
+ bricks buttress issue-token --ttl 3600 --json
97
+ ```
98
+
99
+ The token claims `{ k: 'ba', w_id, st: 'ws', sid, jti, exp }` and any buttress-server bound to the same workspace will accept it.
100
+
23
101
  ## Configuration
24
102
 
25
103
  Configuration can be provided via:
@@ -157,6 +235,25 @@ Examples:
157
235
  bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
158
236
  ```
159
237
 
238
+ ## Compatibility Endpoints (Experimental)
239
+
240
+ The server can expose OpenAI- and Anthropic-compatible HTTP endpoints in addition to the native RPC. Each endpoint is opt-in via the TOML config:
241
+
242
+ ```toml
243
+ [openai_compat]
244
+ enabled = true
245
+ # cors_allowed_origins = "*" # Or a list of origins; defaults to disabled
246
+
247
+ [anthropic_messages]
248
+ enabled = true
249
+ # cors_allowed_origins = ["http://localhost:3000"]
250
+ ```
251
+
252
+ | Endpoint | Config flag |
253
+ | --------------------- | ------------------------------------ |
254
+ | `/oai-compat/v1/*` | `[openai_compat] enabled = true` |
255
+ | `/anthropic-messages` | `[anthropic_messages] enabled = true` |
256
+
160
257
  ## Session State Cache
161
258
 
162
259
  The server supports session state caching for ggml-llm generators, which saves KV cache state to disk after completions. This enables:
@@ -11,15 +11,24 @@
11
11
  # HF_TOKEN = "your_huggingface_token_here"
12
12
  # CUDA_VISIBLE_DEVICES = "0"
13
13
 
14
+ [autodiscover]
15
+ enabled = true
16
+
14
17
  [server]
15
18
  port = 2080
16
19
  log_level = "info"
17
20
  # max_body_size = "100MB" # Supports string (e.g., "100MB", "1GB") or number in bytes
18
21
 
19
22
  [openai_compat]
23
+ enabled = true
20
24
  # cors_allowed_origins = ["http://localhost:3000", "https://example.com"] # Restrict to specific origins
21
25
  # cors_allowed_origins = "*" # Allow all origins (default)
22
26
 
27
+ [anthropic_messages]
28
+ enabled = true
29
+ # cors_allowed_origins = ["http://localhost:3000", "https://example.com"]
30
+ # cors_allowed_origins = "*"
31
+
23
32
  [runtime]
24
33
  cache_dir = "./.buttress-cache"
25
34
  # huggingface_token = "hf_xx"
package/lib/index.d.mts CHANGED
@@ -1,5 +1,6 @@
1
1
 
2
2
  import { AnyElysia, Elysia } from "elysia";
3
+ import crypto from "node:crypto";
3
4
  import { ReadableStream } from "node:stream/web";
4
5
  import { EventEmitter } from "node:events";
5
6
 
@@ -229,9 +230,11 @@ type GeneratorConfig = {
229
230
  type GlobalConfig = {
230
231
  runtime?: RuntimeConfig;
231
232
  openai_compat?: {
233
+ enabled?: boolean;
232
234
  cors_allowed_origins?: string | string[];
233
235
  };
234
236
  anthropic_messages?: {
237
+ enabled?: boolean;
235
238
  cors_allowed_origins?: string | string[];
236
239
  };
237
240
  } & Record<string, any>;
@@ -277,24 +280,40 @@ type Config = {
277
280
  generators: GeneratorConfig[];
278
281
  };
279
282
  type GeneratorInfo = {
280
- type: GeneratorType;
283
+ type: GeneratorType; /** Performance score 0–100 from buttress-hardware-guardrails. */
284
+ score?: number; /** Whether the host has an accelerator (GPU/Metal/etc) for this backend. */
285
+ hasGpu?: boolean; /** Usable memory in bytes for this backend (GPU when present, else CPU). */
286
+ usableBytes?: number;
281
287
  } & Record<string, any>;
282
288
  type ServerInfo = {
283
289
  id: string;
284
290
  name: string;
291
+ version: string;
285
292
  address: string;
286
293
  port: number;
287
294
  url: string;
288
295
  generators: GeneratorInfo[];
289
296
  authentication: {
290
297
  required: boolean;
291
- type: string;
298
+ type: string; /** Issuer key id (when type === 'workspace-jwt'). */
299
+ kid?: string; /** True when buttress is paired with a workspace. */
300
+ bound?: boolean;
301
+ }; /** Workspace identity (only present when paired). */
302
+ workspace?: {
303
+ id: string;
304
+ name?: string;
292
305
  };
293
306
  };
294
307
  //#endregion
295
308
  //#region src/autodiscover/types.d.ts
296
309
  type GetServerInfoFn = () => ServerInfo;
297
310
  //#endregion
311
+ //#region src/autodiscover/udp.d.ts
312
+ interface AnnounceSigner {
313
+ kid: string;
314
+ privateKey: crypto.KeyObject;
315
+ }
316
+ //#endregion
298
317
  //#region src/autodiscover/index.d.ts
299
318
  /**
300
319
  * Autodiscover service that manages discovery transports.
@@ -304,9 +323,10 @@ type GetServerInfoFn = () => ServerInfo;
304
323
  declare class AutodiscoverService {
305
324
  private config;
306
325
  private getServerInfo;
326
+ private signer;
307
327
  private transports;
308
328
  private started;
309
- constructor(config: AutodiscoverConfig, getServerInfo: GetServerInfoFn);
329
+ constructor(config: AutodiscoverConfig, getServerInfo: GetServerInfoFn, signer: AnnounceSigner | null);
310
330
  start(): Promise<void>;
311
331
  stop(): Promise<void>;
312
332
  }