@fugood/buttress-server 2.23.0 → 2.23.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,6 @@
11
11
  # HF_TOKEN = "your_huggingface_token_here"
12
12
  # CUDA_VISIBLE_DEVICES = "0"
13
13
 
14
- [autodiscover.http]
15
- enabled = true
16
-
17
14
  [server]
18
15
  port = 2080
19
16
  log_level = "info"
@@ -101,6 +98,19 @@ repo_id = "unsloth/Nemotron-3-Nano-30B-A3B-GGUF"
101
98
  quantization = "q4_0"
102
99
  n_ctx = 51200
103
100
 
101
+ [[generators]]
102
+ type = "ggml-llm"
103
+ [generators.backend]
104
+ variant = "default"
105
+ variant_preference = ["cuda", "vulkan", "snapdragon", "default"]
106
+ gpu_memory_fraction = 0.8
107
+ cpu_memory_fraction = 0.8
108
+ [generators.model]
109
+ repo_id = "DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF"
110
+ quantization = "q2_k"
111
+ n_ctx = 64000
112
+ kv_unified = true
113
+
104
114
  [[generators]]
105
115
  type = "ggml-llm"
106
116
  [generators.backend]