@fugood/llama.node 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +0 -9
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
|
@@ -1,44 +1,40 @@
|
|
|
1
|
+
#include "arg.h"
|
|
1
2
|
#include "ggml.h"
|
|
2
|
-
#include "llama.h"
|
|
3
3
|
#include "common.h"
|
|
4
4
|
#include "ngram-cache.h"
|
|
5
|
+
#include "sampling.h"
|
|
6
|
+
#include "log.h"
|
|
7
|
+
#include "llama.h"
|
|
5
8
|
|
|
6
|
-
#include <cmath>
|
|
7
9
|
#include <cstdint>
|
|
8
10
|
#include <cstdio>
|
|
9
11
|
#include <fstream>
|
|
10
12
|
#include <string>
|
|
11
13
|
#include <vector>
|
|
12
|
-
#include <unordered_map>
|
|
13
14
|
|
|
14
15
|
int main(int argc, char ** argv){
|
|
15
16
|
gpt_params params;
|
|
16
17
|
|
|
17
|
-
if (!gpt_params_parse(argc, argv, params)) {
|
|
18
|
-
gpt_params_print_usage(argc, argv, params);
|
|
18
|
+
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) {
|
|
19
19
|
return 1;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
gpt_init();
|
|
23
|
+
|
|
22
24
|
// max. number of additional tokens to draft if match is found
|
|
23
25
|
const int n_draft = params.n_draft;
|
|
24
26
|
|
|
25
27
|
const bool dump_kv_cache = params.dump_kv_cache;
|
|
26
28
|
|
|
27
|
-
#ifndef LOG_DISABLE_LOGS
|
|
28
|
-
log_set_target(log_filename_generator("lookup", "log"));
|
|
29
|
-
LOG_TEE("Log start\n");
|
|
30
|
-
log_dump_cmdline(argc, argv);
|
|
31
|
-
#endif // LOG_DISABLE_LOGS
|
|
32
|
-
|
|
33
29
|
// init llama.cpp
|
|
34
30
|
llama_backend_init();
|
|
35
31
|
llama_numa_init(params.numa);
|
|
36
32
|
|
|
37
|
-
llama_model * model = NULL;
|
|
38
|
-
llama_context * ctx = NULL;
|
|
39
|
-
|
|
40
33
|
// load the model
|
|
41
|
-
|
|
34
|
+
llama_init_result llama_init = llama_init_from_gpt_params(params);
|
|
35
|
+
|
|
36
|
+
llama_model * model = llama_init.model;
|
|
37
|
+
llama_context * ctx = llama_init.context;
|
|
42
38
|
|
|
43
39
|
// tokenize the prompt
|
|
44
40
|
std::vector<llama_token> inp;
|
|
@@ -59,7 +55,7 @@ int main(int argc, char ** argv){
|
|
|
59
55
|
try {
|
|
60
56
|
ngram_cache_static = llama_ngram_cache_load(params.lookup_cache_static);
|
|
61
57
|
} catch (std::ifstream::failure const &) {
|
|
62
|
-
|
|
58
|
+
LOG_ERR("failed to open static lookup cache: %s", params.lookup_cache_static.c_str());
|
|
63
59
|
exit(1);
|
|
64
60
|
}
|
|
65
61
|
}
|
|
@@ -77,14 +73,14 @@ int main(int argc, char ** argv){
|
|
|
77
73
|
const int max_tokens_list_size = max_context_size - 4;
|
|
78
74
|
|
|
79
75
|
if ((int) inp.size() > max_tokens_list_size) {
|
|
80
|
-
|
|
76
|
+
LOG_ERR("%s: prompt too long (%d tokens, max %d)\n", __func__, (int) inp.size(), max_tokens_list_size);
|
|
81
77
|
return 1;
|
|
82
78
|
}
|
|
83
79
|
|
|
84
|
-
|
|
80
|
+
LOG("\n\n");
|
|
85
81
|
|
|
86
82
|
for (auto id : inp) {
|
|
87
|
-
|
|
83
|
+
LOG("%s", llama_token_to_piece(ctx, id).c_str());
|
|
88
84
|
}
|
|
89
85
|
|
|
90
86
|
fflush(stderr);
|
|
@@ -106,7 +102,7 @@ int main(int argc, char ** argv){
|
|
|
106
102
|
|
|
107
103
|
bool has_eos = false;
|
|
108
104
|
|
|
109
|
-
struct
|
|
105
|
+
struct gpt_sampler * smpl = gpt_sampler_init(model, params.sparams);
|
|
110
106
|
|
|
111
107
|
std::vector<llama_token> draft;
|
|
112
108
|
|
|
@@ -125,19 +121,19 @@ int main(int argc, char ** argv){
|
|
|
125
121
|
}
|
|
126
122
|
|
|
127
123
|
// print current draft sequence
|
|
128
|
-
|
|
124
|
+
LOG_DBG("drafted %s\n", string_from(ctx, draft).c_str());
|
|
129
125
|
|
|
130
126
|
int i_dft = 0;
|
|
131
127
|
while (true) {
|
|
132
128
|
// sample from the target model
|
|
133
|
-
llama_token id =
|
|
129
|
+
llama_token id = gpt_sampler_sample(smpl, ctx, i_dft);
|
|
134
130
|
|
|
135
|
-
|
|
131
|
+
gpt_sampler_accept(smpl, id, true);
|
|
136
132
|
|
|
137
133
|
const std::string token_str = llama_token_to_piece(ctx, id);
|
|
138
134
|
|
|
139
135
|
if (!params.use_color) {
|
|
140
|
-
|
|
136
|
+
LOG("%s", token_str.c_str());
|
|
141
137
|
}
|
|
142
138
|
|
|
143
139
|
if (llama_token_is_eog(model, id)) {
|
|
@@ -148,7 +144,7 @@ int main(int argc, char ** argv){
|
|
|
148
144
|
|
|
149
145
|
// check if the target token matches the draft
|
|
150
146
|
if (i_dft < (int) draft.size() && id == draft[i_dft]) {
|
|
151
|
-
|
|
147
|
+
LOG_DBG("the sampled target token matches the %dth drafted token (%d, '%s') - accepted\n", i_dft, id, token_str.c_str());
|
|
152
148
|
++n_accept;
|
|
153
149
|
++n_past;
|
|
154
150
|
++i_dft;
|
|
@@ -162,19 +158,19 @@ int main(int argc, char ** argv){
|
|
|
162
158
|
|
|
163
159
|
if (params.use_color) {
|
|
164
160
|
// color accepted draft token
|
|
165
|
-
|
|
161
|
+
LOG("\033[34m%s\033[0m", token_str.c_str());
|
|
166
162
|
fflush(stdout);
|
|
167
163
|
}
|
|
168
164
|
continue;
|
|
169
165
|
}
|
|
170
166
|
|
|
171
167
|
if (params.use_color) {
|
|
172
|
-
|
|
168
|
+
LOG("%s", token_str.c_str());
|
|
173
169
|
}
|
|
174
170
|
fflush(stdout);
|
|
175
171
|
|
|
176
172
|
|
|
177
|
-
|
|
173
|
+
LOG_DBG("the sampled target token (%d, '%s') did not match, or we ran out of drafted tokens\n", id, token_str.c_str());
|
|
178
174
|
|
|
179
175
|
draft.clear();
|
|
180
176
|
draft.push_back(id);
|
|
@@ -225,25 +221,26 @@ int main(int argc, char ** argv){
|
|
|
225
221
|
llama_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context);
|
|
226
222
|
llama_ngram_cache_save(ngram_cache_dynamic, params.lookup_cache_dynamic);
|
|
227
223
|
|
|
228
|
-
|
|
224
|
+
LOG("\n\n");
|
|
229
225
|
|
|
230
|
-
|
|
231
|
-
|
|
226
|
+
LOG_INF("encoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_input, (t_enc_end - t_enc_start) / 1e6f, inp.size() / ((t_enc_end - t_enc_start) / 1e6f));
|
|
227
|
+
LOG_INF("decoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_predict, (t_dec_end - t_dec_start) / 1e6f, n_predict / ((t_dec_end - t_dec_start) / 1e6f));
|
|
232
228
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
229
|
+
LOG_INF("\n");
|
|
230
|
+
LOG_INF("n_draft = %d\n", n_draft);
|
|
231
|
+
LOG_INF("n_predict = %d\n", n_predict);
|
|
232
|
+
LOG_INF("n_drafted = %d\n", n_drafted);
|
|
233
|
+
LOG_INF("t_draft_flat = %.2f ms\n", t_draft_flat_us*1e-3);
|
|
234
|
+
LOG_INF("t_draft = %.2f ms, %.2f us per token, %.2f tokens per second\n",
|
|
239
235
|
t_draft_us*1e-3, 1.0f*t_draft_us/n_drafted, n_drafted/(1e-6*t_draft_us));
|
|
240
|
-
|
|
241
|
-
|
|
236
|
+
LOG_INF("n_accept = %d\n", n_accept);
|
|
237
|
+
LOG_INF("accept = %.3f%%\n", 100.0f * n_accept / n_drafted);
|
|
238
|
+
|
|
239
|
+
LOG_INF("\ntarget:\n\n");
|
|
240
|
+
gpt_perf_print(ctx, smpl);
|
|
242
241
|
|
|
243
|
-
|
|
244
|
-
llama_print_timings(ctx);
|
|
242
|
+
gpt_sampler_free(smpl);
|
|
245
243
|
|
|
246
|
-
llama_sampling_free(ctx_sampling);
|
|
247
244
|
llama_batch_free(batch_tgt);
|
|
248
245
|
|
|
249
246
|
llama_free(ctx);
|
|
@@ -251,7 +248,7 @@ int main(int argc, char ** argv){
|
|
|
251
248
|
|
|
252
249
|
llama_backend_free();
|
|
253
250
|
|
|
254
|
-
|
|
251
|
+
LOG("\n\n");
|
|
255
252
|
|
|
256
253
|
return 0;
|
|
257
254
|
}
|