@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -1,17 +1,17 @@
|
|
|
1
|
+
#include "arg.h"
|
|
1
2
|
#include "common.h"
|
|
2
3
|
#include "llama.h"
|
|
3
4
|
|
|
4
5
|
#include <vector>
|
|
5
6
|
#include <cstdio>
|
|
6
|
-
#include <chrono>
|
|
7
7
|
|
|
8
8
|
int main(int argc, char ** argv) {
|
|
9
|
-
|
|
9
|
+
common_params params;
|
|
10
10
|
|
|
11
11
|
params.prompt = "The quick brown fox";
|
|
12
|
+
params.sparams.seed = 1234;
|
|
12
13
|
|
|
13
|
-
if (!
|
|
14
|
-
gpt_params_print_usage(argc, argv, params);
|
|
14
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
|
|
15
15
|
return 1;
|
|
16
16
|
}
|
|
17
17
|
|
|
@@ -28,21 +28,35 @@ int main(int argc, char ** argv) {
|
|
|
28
28
|
std::string result2;
|
|
29
29
|
|
|
30
30
|
// init
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
common_init_result llama_init = common_init_from_params(params);
|
|
32
|
+
|
|
33
|
+
llama_model * model = llama_init.model;
|
|
34
|
+
llama_context * ctx = llama_init.context;
|
|
33
35
|
|
|
34
|
-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
|
35
36
|
if (model == nullptr || ctx == nullptr) {
|
|
36
37
|
fprintf(stderr, "%s : failed to init\n", __func__);
|
|
37
38
|
return 1;
|
|
38
39
|
}
|
|
39
40
|
|
|
41
|
+
auto sparams = llama_sampler_chain_default_params();
|
|
42
|
+
|
|
43
|
+
llama_sampler * smpl = llama_sampler_chain_init(sparams);
|
|
44
|
+
|
|
45
|
+
llama_sampler_chain_add(smpl, llama_sampler_init_dist(params.sparams.seed));
|
|
46
|
+
|
|
40
47
|
// tokenize prompt
|
|
41
|
-
auto tokens =
|
|
48
|
+
auto tokens = common_tokenize(ctx, params.prompt, true);
|
|
49
|
+
|
|
50
|
+
// prepare the batch
|
|
51
|
+
llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
|
|
52
|
+
for (size_t i = 0; i < tokens.size(); i++) {
|
|
53
|
+
common_batch_add(batch, tokens[i], i, {0}, false);
|
|
54
|
+
}
|
|
55
|
+
batch.logits[batch.n_tokens - 1] = true; // generate next token
|
|
42
56
|
|
|
43
57
|
// evaluate prompt
|
|
44
|
-
llama_decode(ctx,
|
|
45
|
-
n_past +=
|
|
58
|
+
llama_decode(ctx, batch);
|
|
59
|
+
n_past += batch.n_tokens;
|
|
46
60
|
|
|
47
61
|
// save state (rng, logits, embedding and kv_cache) to file
|
|
48
62
|
{
|
|
@@ -63,23 +77,18 @@ int main(int argc, char ** argv) {
|
|
|
63
77
|
printf("\nfirst run: %s", params.prompt.c_str());
|
|
64
78
|
|
|
65
79
|
for (auto i = 0; i < params.n_predict; i++) {
|
|
66
|
-
auto
|
|
67
|
-
auto
|
|
68
|
-
|
|
69
|
-
std::vector<llama_token_data> candidates;
|
|
70
|
-
candidates.reserve(n_vocab);
|
|
71
|
-
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
72
|
-
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
|
73
|
-
}
|
|
74
|
-
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
75
|
-
auto next_token = llama_sample_token(ctx, &candidates_p);
|
|
76
|
-
auto next_token_str = llama_token_to_piece(ctx, next_token);
|
|
80
|
+
auto next_token = llama_sampler_sample(smpl, ctx, -1);
|
|
81
|
+
auto next_token_str = common_token_to_piece(ctx, next_token);
|
|
77
82
|
|
|
78
83
|
printf("%s", next_token_str.c_str());
|
|
79
84
|
result0 += next_token_str;
|
|
80
85
|
|
|
81
|
-
|
|
86
|
+
common_batch_clear(batch);
|
|
87
|
+
common_batch_add(batch, next_token, n_past, {0}, true);
|
|
88
|
+
|
|
89
|
+
if (llama_decode(ctx, batch)) {
|
|
82
90
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
|
91
|
+
llama_batch_free(batch);
|
|
83
92
|
llama_free(ctx);
|
|
84
93
|
llama_free_model(model);
|
|
85
94
|
return 1;
|
|
@@ -93,7 +102,11 @@ int main(int argc, char ** argv) {
|
|
|
93
102
|
llama_free(ctx);
|
|
94
103
|
|
|
95
104
|
// make new context
|
|
96
|
-
auto * ctx2 = llama_new_context_with_model(model,
|
|
105
|
+
auto * ctx2 = llama_new_context_with_model(model, common_context_params_to_llama(params));
|
|
106
|
+
|
|
107
|
+
llama_sampler * smpl2 = llama_sampler_chain_init(sparams);
|
|
108
|
+
|
|
109
|
+
llama_sampler_chain_add(smpl2, llama_sampler_init_dist(params.sparams.seed));
|
|
97
110
|
|
|
98
111
|
printf("\nsecond run: %s", params.prompt.c_str());
|
|
99
112
|
|
|
@@ -123,22 +136,18 @@ int main(int argc, char ** argv) {
|
|
|
123
136
|
|
|
124
137
|
// second run
|
|
125
138
|
for (auto i = 0; i < params.n_predict; i++) {
|
|
126
|
-
auto
|
|
127
|
-
auto
|
|
128
|
-
std::vector<llama_token_data> candidates;
|
|
129
|
-
candidates.reserve(n_vocab);
|
|
130
|
-
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
131
|
-
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
|
132
|
-
}
|
|
133
|
-
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
134
|
-
auto next_token = llama_sample_token(ctx2, &candidates_p);
|
|
135
|
-
auto next_token_str = llama_token_to_piece(ctx2, next_token);
|
|
139
|
+
auto next_token = llama_sampler_sample(smpl2, ctx2, -1);
|
|
140
|
+
auto next_token_str = common_token_to_piece(ctx2, next_token);
|
|
136
141
|
|
|
137
142
|
printf("%s", next_token_str.c_str());
|
|
138
143
|
result1 += next_token_str;
|
|
139
144
|
|
|
140
|
-
|
|
145
|
+
common_batch_clear(batch);
|
|
146
|
+
common_batch_add(batch, next_token, n_past, {0}, true);
|
|
147
|
+
|
|
148
|
+
if (llama_decode(ctx2, batch)) {
|
|
141
149
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
|
150
|
+
llama_batch_free(batch);
|
|
142
151
|
llama_free(ctx2);
|
|
143
152
|
llama_free_model(model);
|
|
144
153
|
return 1;
|
|
@@ -156,7 +165,11 @@ int main(int argc, char ** argv) {
|
|
|
156
165
|
}
|
|
157
166
|
|
|
158
167
|
// make new context
|
|
159
|
-
auto* ctx3 = llama_new_context_with_model(model,
|
|
168
|
+
auto * ctx3 = llama_new_context_with_model(model, common_context_params_to_llama(params));
|
|
169
|
+
|
|
170
|
+
llama_sampler * smpl3 = llama_sampler_chain_init(sparams);
|
|
171
|
+
|
|
172
|
+
llama_sampler_chain_add(smpl3, llama_sampler_init_dist(params.sparams.seed));
|
|
160
173
|
|
|
161
174
|
printf("\nsingle seq run: %s", params.prompt.c_str());
|
|
162
175
|
|
|
@@ -214,22 +227,18 @@ int main(int argc, char ** argv) {
|
|
|
214
227
|
|
|
215
228
|
// third run with seq 1 instead of 0
|
|
216
229
|
for (auto i = 0; i < params.n_predict; i++) {
|
|
217
|
-
auto
|
|
218
|
-
auto
|
|
219
|
-
std::vector<llama_token_data> candidates;
|
|
220
|
-
candidates.reserve(n_vocab);
|
|
221
|
-
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
222
|
-
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
|
223
|
-
}
|
|
224
|
-
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
225
|
-
auto next_token = llama_sample_token(ctx3, &candidates_p);
|
|
226
|
-
auto next_token_str = llama_token_to_piece(ctx3, next_token);
|
|
230
|
+
auto next_token = llama_sampler_sample(smpl3, ctx3, -1);
|
|
231
|
+
auto next_token_str = common_token_to_piece(ctx3, next_token);
|
|
227
232
|
|
|
228
233
|
printf("%s", next_token_str.c_str());
|
|
229
234
|
result2 += next_token_str;
|
|
230
235
|
|
|
231
|
-
|
|
236
|
+
common_batch_clear(batch);
|
|
237
|
+
common_batch_add(batch, next_token, n_past, {1}, true);
|
|
238
|
+
|
|
239
|
+
if (llama_decode(ctx3, batch)) {
|
|
232
240
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
|
241
|
+
llama_batch_free(batch);
|
|
233
242
|
llama_free(ctx3);
|
|
234
243
|
llama_free_model(model);
|
|
235
244
|
return 1;
|
|
@@ -239,6 +248,11 @@ int main(int argc, char ** argv) {
|
|
|
239
248
|
|
|
240
249
|
printf("\n");
|
|
241
250
|
|
|
251
|
+
llama_sampler_free(smpl);
|
|
252
|
+
llama_sampler_free(smpl2);
|
|
253
|
+
llama_sampler_free(smpl3);
|
|
254
|
+
|
|
255
|
+
llama_batch_free(batch);
|
|
242
256
|
llama_free(ctx3);
|
|
243
257
|
llama_free_model(model);
|
|
244
258
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
set(TARGET llama-server)
|
|
2
|
-
|
|
3
|
-
option(LLAMA_SERVER_SSL
|
|
2
|
+
|
|
3
|
+
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
|
4
4
|
|
|
5
5
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
|
6
6
|
|
|
@@ -15,21 +15,13 @@ set(TARGET_SRCS
|
|
|
15
15
|
httplib.h
|
|
16
16
|
)
|
|
17
17
|
set(PUBLIC_ASSETS
|
|
18
|
-
colorthemes.css
|
|
19
|
-
style.css
|
|
20
|
-
theme-beeninorder.css
|
|
21
|
-
theme-ketivah.css
|
|
22
|
-
theme-mangotango.css
|
|
23
|
-
theme-playground.css
|
|
24
|
-
theme-polarnight.css
|
|
25
|
-
theme-snowstorm.css
|
|
26
18
|
index.html
|
|
27
|
-
index-new.html
|
|
28
|
-
index.js
|
|
29
19
|
completion.js
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
20
|
+
loading.html
|
|
21
|
+
deps_daisyui.min.css
|
|
22
|
+
deps_markdown-it.js
|
|
23
|
+
deps_tailwindcss.js
|
|
24
|
+
deps_vue.esm-browser.js
|
|
33
25
|
)
|
|
34
26
|
|
|
35
27
|
foreach(asset ${PUBLIC_ASSETS})
|
|
@@ -45,9 +37,6 @@ endforeach()
|
|
|
45
37
|
|
|
46
38
|
add_executable(${TARGET} ${TARGET_SRCS})
|
|
47
39
|
install(TARGETS ${TARGET} RUNTIME)
|
|
48
|
-
target_compile_definitions(${TARGET} PRIVATE
|
|
49
|
-
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
|
50
|
-
)
|
|
51
40
|
|
|
52
41
|
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
|
53
42
|
|