@fugood/llama.node 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +89 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +132 -13
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +33 -174
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +10 -9
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +4 -2
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +3 -4
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +5 -5
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +7 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +79 -34
- package/src/llama.cpp/examples/parallel/parallel.cpp +6 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +15 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +196 -108
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +113 -101
- package/src/llama.cpp/examples/server/utils.hpp +94 -105
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +263 -151
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -1
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +29 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -34
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -7
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +139 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1546 -387
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1645 -113
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +242 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -6
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -138
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +117 -36
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +147 -16
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +307 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +262 -746
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -78
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +498 -188
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +16 -3
- package/src/llama.cpp/ggml/src/ggml.c +93 -5
- package/src/llama.cpp/include/llama.h +105 -27
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +123 -16
- package/src/llama.cpp/src/llama-arch.h +19 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -109
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +8230 -122
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +51 -9837
- package/src/llama.cpp/tests/test-backend-ops.cpp +247 -112
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
#include <string>
|
|
2
2
|
#include <vector>
|
|
3
3
|
#include <sstream>
|
|
4
|
+
#include <regex>
|
|
4
5
|
|
|
5
6
|
#undef NDEBUG
|
|
6
7
|
#include <cassert>
|
|
7
8
|
|
|
8
9
|
#include "llama.h"
|
|
9
10
|
#include "common.h"
|
|
10
|
-
#include "chat
|
|
11
|
+
#include "chat.h"
|
|
11
12
|
|
|
12
13
|
static std::string normalize_newlines(const std::string & s) {
|
|
13
14
|
#ifdef _WIN32
|
|
@@ -18,6 +19,13 @@ static std::string normalize_newlines(const std::string & s) {
|
|
|
18
19
|
#endif
|
|
19
20
|
}
|
|
20
21
|
|
|
22
|
+
static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
|
|
23
|
+
common_chat_msg msg;
|
|
24
|
+
msg.role = role;
|
|
25
|
+
msg.content = content;
|
|
26
|
+
return msg;
|
|
27
|
+
}
|
|
28
|
+
|
|
21
29
|
int main(void) {
|
|
22
30
|
std::vector<llama_chat_message> conversation {
|
|
23
31
|
{"system", "You are a helpful assistant"},
|
|
@@ -50,7 +58,7 @@ int main(void) {
|
|
|
50
58
|
/* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
|
51
59
|
/* .expected_output= */ "[INST] You are a helpful assistant\nHello [/INST]Hi there</s>[INST] Who are you [/INST] I am an assistant </s>[INST] Another question [/INST]",
|
|
52
60
|
/* .expected_output_jinja= */ "",
|
|
53
|
-
/* .bos_token= */ "",
|
|
61
|
+
/* .bos_token= */ "<s>",
|
|
54
62
|
/* .eos_token= */ "</s>",
|
|
55
63
|
},
|
|
56
64
|
{
|
|
@@ -72,8 +80,8 @@ int main(void) {
|
|
|
72
80
|
{
|
|
73
81
|
/* .name= */ "mlabonne/AlphaMonarch-7B",
|
|
74
82
|
/* .template_str= */ "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}",
|
|
75
|
-
/* .expected_output= */
|
|
76
|
-
/* .expected_output_jinja= */ "
|
|
83
|
+
/* .expected_output= */ "system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n I am an assistant </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
|
|
84
|
+
/* .expected_output_jinja= */ "",
|
|
77
85
|
/* .bos_token= */ "<s>",
|
|
78
86
|
/* .eos_token= */ "</s>",
|
|
79
87
|
},
|
|
@@ -87,7 +95,7 @@ int main(void) {
|
|
|
87
95
|
/* .name= */ "OrionStarAI/Orion-14B-Chat",
|
|
88
96
|
/* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
89
97
|
/* .expected_output= */ "Human: You are a helpful assistant\n\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s> I am an assistant </s>Human: Another question\n\nAssistant: </s>",
|
|
90
|
-
/* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s> I am an assistant </s>Human: Another question\n\nAssistant:
|
|
98
|
+
/* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s> I am an assistant </s>Human: Another question\n\nAssistant: ",
|
|
91
99
|
/* .bos_token= */ "",
|
|
92
100
|
/* .eos_token= */ "</s>",
|
|
93
101
|
},
|
|
@@ -304,12 +312,9 @@ int main(void) {
|
|
|
304
312
|
}
|
|
305
313
|
}
|
|
306
314
|
|
|
307
|
-
|
|
315
|
+
std::vector<common_chat_msg> messages;
|
|
308
316
|
for (const auto & msg : conversation) {
|
|
309
|
-
messages.push_back(
|
|
310
|
-
{"role", msg.role},
|
|
311
|
-
{"content", msg.content},
|
|
312
|
-
});
|
|
317
|
+
messages.push_back(simple_msg(msg.role, msg.content));
|
|
313
318
|
}
|
|
314
319
|
for (const auto & test_case : test_cases) {
|
|
315
320
|
if (!test_case.supported_with_jinja) {
|
|
@@ -317,8 +322,13 @@ int main(void) {
|
|
|
317
322
|
}
|
|
318
323
|
printf("\n\n=== %s (jinja) ===\n\n", test_case.name.c_str());
|
|
319
324
|
try {
|
|
320
|
-
|
|
321
|
-
|
|
325
|
+
auto tmpls = common_chat_templates_init(/* model= */ nullptr, test_case.template_str.c_str(), test_case.bos_token, test_case.eos_token);
|
|
326
|
+
common_chat_templates_inputs inputs;
|
|
327
|
+
inputs.use_jinja = true;
|
|
328
|
+
inputs.messages = messages;
|
|
329
|
+
inputs.add_generation_prompt = add_generation_prompt;
|
|
330
|
+
auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
|
|
331
|
+
output = normalize_newlines(output);
|
|
322
332
|
auto expected_output = normalize_newlines(test_case.expected_output_jinja.empty() ? test_case.expected_output : test_case.expected_output_jinja);
|
|
323
333
|
if (output != expected_output) {
|
|
324
334
|
printf("Expected:\n%s\n", expected_output.c_str());
|
|
@@ -336,11 +346,11 @@ int main(void) {
|
|
|
336
346
|
// test llama_chat_format_single for system message
|
|
337
347
|
printf("\n\n=== llama_chat_format_single (system message) ===\n\n");
|
|
338
348
|
std::vector<common_chat_msg> chat2;
|
|
339
|
-
|
|
349
|
+
auto sys_msg = simple_msg("system", "You are a helpful assistant");
|
|
340
350
|
|
|
341
351
|
auto fmt_sys = [&](std::string tmpl_str) {
|
|
342
|
-
|
|
343
|
-
auto output = common_chat_format_single(
|
|
352
|
+
auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
|
|
353
|
+
auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
|
|
344
354
|
printf("fmt_sys(%s) : %s\n", tmpl_str.c_str(), output.c_str());
|
|
345
355
|
printf("-------------------------\n");
|
|
346
356
|
return output;
|
|
@@ -360,14 +370,14 @@ int main(void) {
|
|
|
360
370
|
|
|
361
371
|
// test llama_chat_format_single for user message
|
|
362
372
|
printf("\n\n=== llama_chat_format_single (user message) ===\n\n");
|
|
363
|
-
chat2.push_back(
|
|
364
|
-
chat2.push_back(
|
|
365
|
-
chat2.push_back(
|
|
366
|
-
|
|
373
|
+
chat2.push_back(simple_msg("system", "You are a helpful assistant"));
|
|
374
|
+
chat2.push_back(simple_msg("user", "Hello"));
|
|
375
|
+
chat2.push_back(simple_msg("assistant", "I am assistant"));
|
|
376
|
+
auto new_msg = simple_msg("user", "How are you");
|
|
367
377
|
|
|
368
|
-
auto fmt_single = [&](std::string tmpl_str) {
|
|
369
|
-
|
|
370
|
-
auto output = common_chat_format_single(
|
|
378
|
+
auto fmt_single = [&](const std::string & tmpl_str) {
|
|
379
|
+
auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
|
|
380
|
+
auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
|
|
371
381
|
printf("fmt_single(%s) : %s\n", tmpl_str.c_str(), output.c_str());
|
|
372
382
|
printf("-------------------------\n");
|
|
373
383
|
return output;
|