@fugood/buttress-server 2.23.0-beta.52 → 2.23.0-beta.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,18 @@ enabled = true
43
43
  max_size_bytes = "10GB" # Supports string (e.g., "10GB", "500MB") or number
44
44
  max_entries = 1000
45
45
 
46
+ [[generators]]
47
+ type = "ggml-llm"
48
+ [generators.backend]
49
+ variant = "default"
50
+ variant_preference = ["cuda", "vulkan", "snapdragon", "default"]
51
+ gpu_memory_fraction = 0.8
52
+ cpu_memory_fraction = 0.8
53
+ [generators.model]
54
+ repo_id = "unsloth/GLM-4.7-Flash-GGUF"
55
+ quantization = "q4_0"
56
+ n_ctx = 51200
57
+
46
58
  [[generators]]
47
59
  type = "ggml-llm"
48
60
  [generators.backend]