@fugood/llama.node 0.3.14 → 0.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/llama.cpp/.github/workflows/build.yml +30 -1
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/arg.cpp +20 -2
- package/src/llama.cpp/common/common.cpp +6 -3
- package/src/llama.cpp/common/speculative.cpp +4 -4
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +4 -4
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +6 -6
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/run.cpp +91 -46
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +37 -15
- package/src/llama.cpp/examples/server/utils.hpp +3 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/tts/tts.cpp +20 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +24 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -28
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +1493 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +150 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +284 -29
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +95 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +35 -12
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +93 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +12 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +12 -43
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +109 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +19 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +398 -158
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +7 -2
- package/src/llama.cpp/ggml/src/ggml.c +85 -2
- package/src/llama.cpp/include/llama.h +86 -22
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +103 -16
- package/src/llama.cpp/src/llama-arch.h +18 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -110
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-model.cpp +8244 -173
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama.cpp +51 -9984
- package/src/llama.cpp/tests/test-backend-ops.cpp +145 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
#include "llama.h"
|
|
4
4
|
#include "llama-arch.h"
|
|
5
|
+
#include "llama-graph.h"
|
|
5
6
|
#include "llama-hparams.h"
|
|
7
|
+
#include "llama-memory.h"
|
|
6
8
|
#include "llama-vocab.h"
|
|
7
9
|
|
|
8
10
|
#include <memory>
|
|
@@ -10,6 +12,8 @@
|
|
|
10
12
|
#include <unordered_map>
|
|
11
13
|
#include <vector>
|
|
12
14
|
|
|
15
|
+
struct llama_cparams;
|
|
16
|
+
struct llama_ubatch;
|
|
13
17
|
struct llama_model_loader;
|
|
14
18
|
|
|
15
19
|
// available models
|
|
@@ -25,6 +29,7 @@ enum llm_type {
|
|
|
25
29
|
LLM_TYPE_109M,
|
|
26
30
|
LLM_TYPE_137M,
|
|
27
31
|
LLM_TYPE_160M,
|
|
32
|
+
LLM_TYPE_190M,
|
|
28
33
|
LLM_TYPE_220M,
|
|
29
34
|
LLM_TYPE_250M,
|
|
30
35
|
LLM_TYPE_270M,
|
|
@@ -41,6 +46,7 @@ enum llm_type {
|
|
|
41
46
|
LLM_TYPE_1_6B,
|
|
42
47
|
LLM_TYPE_2B,
|
|
43
48
|
LLM_TYPE_2_8B,
|
|
49
|
+
LLM_TYPE_2_9B,
|
|
44
50
|
LLM_TYPE_3B,
|
|
45
51
|
LLM_TYPE_4B,
|
|
46
52
|
LLM_TYPE_6B,
|
|
@@ -256,6 +262,20 @@ struct llama_layer {
|
|
|
256
262
|
struct ggml_tensor * time_mix_receptance_b = nullptr;
|
|
257
263
|
struct ggml_tensor * time_mix_gate = nullptr;
|
|
258
264
|
|
|
265
|
+
// rwkv7
|
|
266
|
+
struct ggml_tensor * time_mix_w0 = nullptr;
|
|
267
|
+
struct ggml_tensor * time_mix_a0 = nullptr;
|
|
268
|
+
struct ggml_tensor * time_mix_a1 = nullptr;
|
|
269
|
+
struct ggml_tensor * time_mix_a2 = nullptr;
|
|
270
|
+
struct ggml_tensor * time_mix_v0 = nullptr;
|
|
271
|
+
struct ggml_tensor * time_mix_v1 = nullptr;
|
|
272
|
+
struct ggml_tensor * time_mix_v2 = nullptr;
|
|
273
|
+
struct ggml_tensor * time_mix_g1 = nullptr;
|
|
274
|
+
struct ggml_tensor * time_mix_g2 = nullptr;
|
|
275
|
+
struct ggml_tensor * time_mix_k_k = nullptr;
|
|
276
|
+
struct ggml_tensor * time_mix_k_a = nullptr;
|
|
277
|
+
struct ggml_tensor * time_mix_r_k = nullptr;
|
|
278
|
+
|
|
259
279
|
struct ggml_tensor * time_mix_ln = nullptr;
|
|
260
280
|
struct ggml_tensor * time_mix_ln_b = nullptr;
|
|
261
281
|
struct ggml_tensor * time_mix_output = nullptr;
|
|
@@ -347,7 +367,7 @@ struct llama_model {
|
|
|
347
367
|
std::string desc() const;
|
|
348
368
|
|
|
349
369
|
size_t size() const;
|
|
350
|
-
size_t
|
|
370
|
+
size_t n_tensors() const;
|
|
351
371
|
size_t n_devices() const;
|
|
352
372
|
|
|
353
373
|
// total number of parameters in the model
|
|
@@ -362,9 +382,22 @@ struct llama_model {
|
|
|
362
382
|
|
|
363
383
|
const struct ggml_tensor * get_tensor(const char * name) const;
|
|
364
384
|
|
|
385
|
+
// TODO: move this to new llm_arch_model_i interface
|
|
386
|
+
llama_memory_i * create_memory() const; // TODO: params
|
|
387
|
+
|
|
388
|
+
// TODO: move this to new llm_arch_model_i interface
|
|
389
|
+
llm_graph_result_ptr build_graph(
|
|
390
|
+
const llm_graph_params & params,
|
|
391
|
+
ggml_cgraph * gf,
|
|
392
|
+
llm_graph_type type) const;
|
|
393
|
+
|
|
365
394
|
private:
|
|
366
395
|
struct impl;
|
|
367
396
|
std::unique_ptr<impl> pimpl;
|
|
368
397
|
};
|
|
369
398
|
|
|
370
399
|
const char * llm_type_name(llm_type type);
|
|
400
|
+
|
|
401
|
+
// For internal test use
|
|
402
|
+
// TODO: remove
|
|
403
|
+
const std::vector<std::pair<std::string, ggml_tensor *>> & llama_internal_get_tensor_map(const llama_model * model);
|
|
@@ -756,10 +756,19 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|
|
756
756
|
// NOTE: can't use LLM_TN here because the layer number is not known
|
|
757
757
|
quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
|
|
758
758
|
|
|
759
|
-
// do not quantize RWKV's
|
|
759
|
+
// do not quantize RWKV's small yet 2D weights
|
|
760
760
|
quantize &= name.find("time_mix_first.weight") == std::string::npos;
|
|
761
|
+
quantize &= name.find("time_mix_w0.weight") == std::string::npos;
|
|
761
762
|
quantize &= name.find("time_mix_w1.weight") == std::string::npos;
|
|
762
763
|
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
|
|
764
|
+
quantize &= name.find("time_mix_v0.weight") == std::string::npos;
|
|
765
|
+
quantize &= name.find("time_mix_v1.weight") == std::string::npos;
|
|
766
|
+
quantize &= name.find("time_mix_v2.weight") == std::string::npos;
|
|
767
|
+
quantize &= name.find("time_mix_a0.weight") == std::string::npos;
|
|
768
|
+
quantize &= name.find("time_mix_a1.weight") == std::string::npos;
|
|
769
|
+
quantize &= name.find("time_mix_a2.weight") == std::string::npos;
|
|
770
|
+
quantize &= name.find("time_mix_g1.weight") == std::string::npos;
|
|
771
|
+
quantize &= name.find("time_mix_g2.weight") == std::string::npos;
|
|
763
772
|
quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
|
|
764
773
|
quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
|
|
765
774
|
quantize &= name.find("time_mix_lerp_fused.weight") == std::string::npos;
|