npm - @fugood/llama.node - Versions diffs - 0.3.5 → 0.3.6 - Mend

@fugood/llama.node 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +21 -1
package/package.json +1 -1
package/src/LlamaContext.cpp +39 -2

package/bin/darwin/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/darwin/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/x64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/node.lib CHANGED Viewed

Binary file

package/lib/binding.ts CHANGED Viewed

@@ -9,11 +9,31 @@ export type LlamaModelOptions = {
   model: string
   embedding?: boolean
   embd_normalize?: number
-  pooling_type?: number
+  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
   n_ctx?: number
   n_batch?: number
+  n_ubatch?: number
   n_threads?: number
   n_gpu_layers?: number
+  flash_attn?: boolean
+  cache_type_k?:
+    | 'f16'
+    | 'f32'
+    | 'q8_0'
+    | 'q4_0'
+    | 'q4_1'
+    | 'iq4_nl'
+    | 'q5_0'
+    | 'q5_1'
+  cache_type_v?:
+    | 'f16'
+    | 'f32'
+    | 'q8_0'
+    | 'q4_0'
+    | 'q4_1'
+    | 'iq4_nl'
+    | 'q5_0'
+    | 'q5_1'
   use_mlock?: boolean
   use_mmap?: boolean
   vocab_only?: boolean

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "0.3.5",
+  "version": "0.3.6",
   "description": "Llama.cpp for Node.js",
   "main": "lib/index.js",
   "scripts": {

package/src/LlamaContext.cpp CHANGED Viewed

@@ -1,3 +1,4 @@
+#include "ggml.h"
 #include "LlamaContext.h"
 #include "DetokenizeWorker.h"
 #include "DisposeWorker.h"
@@ -60,6 +61,36 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
   exports.Set("LlamaContext", func);
 }
+const std::vector<ggml_type> kv_cache_types = {
+  GGML_TYPE_F32,
+  GGML_TYPE_F16,
+  GGML_TYPE_BF16,
+  GGML_TYPE_Q8_0,
+  GGML_TYPE_Q4_0,
+  GGML_TYPE_Q4_1,
+  GGML_TYPE_IQ4_NL,
+  GGML_TYPE_Q5_0,
+  GGML_TYPE_Q5_1,
+};
+static ggml_type kv_cache_type_from_str(const std::string & s) {
+  for (const auto & type : kv_cache_types) {
+    if (ggml_type_name(type) == s) {
+      return type;
+    }
+  }
+  throw std::runtime_error("Unsupported cache type: " + s);
+}
+static int32_t pooling_type_from_str(const std::string & s) {
+  if (s == "none") return LLAMA_POOLING_TYPE_NONE;
+  if (s == "mean") return LLAMA_POOLING_TYPE_MEAN;
+  if (s == "cls") return LLAMA_POOLING_TYPE_CLS;
+  if (s == "last") return LLAMA_POOLING_TYPE_LAST;
+  if (s == "rank") return LLAMA_POOLING_TYPE_RANK;
+  return LLAMA_POOLING_TYPE_UNSPECIFIED;
+}
 // construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
 // use_mlock, use_mmap }): LlamaContext throws error
 LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
@@ -83,18 +114,24 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
   params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
   params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
+  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
   params.embedding = get_option<bool>(options, "embedding", false);
   if (params.embedding) {
     // For non-causal models, batch size must be equal to ubatch size
     params.n_ubatch = params.n_batch;
   }
   params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
-  int32_t pooling_type = get_option<int32_t>(options, "pooling_type", -1);
-  params.pooling_type = (enum llama_pooling_type) pooling_type;
+  params.pooling_type = (enum llama_pooling_type) pooling_type_from_str(
+    get_option<std::string>(options, "pooling_type", "").c_str()
+  );
   params.cpuparams.n_threads =
       get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
   params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
+  params.flash_attn = get_option<bool>(options, "flash_attn", false);
+  params.cache_type_k = kv_cache_type_from_str(get_option<std::string>(options, "cache_type_k", "f16").c_str());
+  params.cache_type_v = kv_cache_type_from_str(get_option<std::string>(options, "cache_type_v", "f16").c_str());
   params.use_mlock = get_option<bool>(options, "use_mlock", false);
   params.use_mmap = get_option<bool>(options, "use_mmap", true);
   params.numa =