@fugood/llama.node 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -0
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -30,8 +30,8 @@ int main(int argc, char ** argv) {
|
|
|
30
30
|
// init
|
|
31
31
|
common_init_result llama_init = common_init_from_params(params);
|
|
32
32
|
|
|
33
|
-
llama_model * model = llama_init.model;
|
|
34
|
-
llama_context * ctx = llama_init.context;
|
|
33
|
+
llama_model * model = llama_init.model.get();
|
|
34
|
+
llama_context * ctx = llama_init.context.get();
|
|
35
35
|
|
|
36
36
|
if (model == nullptr || ctx == nullptr) {
|
|
37
37
|
fprintf(stderr, "%s : failed to init\n", __func__);
|
|
@@ -89,8 +89,6 @@ int main(int argc, char ** argv) {
|
|
|
89
89
|
if (llama_decode(ctx, batch)) {
|
|
90
90
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
|
91
91
|
llama_batch_free(batch);
|
|
92
|
-
llama_free(ctx);
|
|
93
|
-
llama_free_model(model);
|
|
94
92
|
return 1;
|
|
95
93
|
}
|
|
96
94
|
n_past += 1;
|
|
@@ -98,11 +96,8 @@ int main(int argc, char ** argv) {
|
|
|
98
96
|
|
|
99
97
|
printf("\n\n");
|
|
100
98
|
|
|
101
|
-
// free old context
|
|
102
|
-
llama_free(ctx);
|
|
103
|
-
|
|
104
99
|
// make new context
|
|
105
|
-
|
|
100
|
+
llama_context * ctx2 = llama_init_from_model(model, common_context_params_to_llama(params));
|
|
106
101
|
|
|
107
102
|
llama_sampler * smpl2 = llama_sampler_chain_init(sparams);
|
|
108
103
|
|
|
@@ -123,8 +118,6 @@ int main(int argc, char ** argv) {
|
|
|
123
118
|
|
|
124
119
|
if (read != llama_state_set_data(ctx2, state_mem.data(), state_mem.size())) {
|
|
125
120
|
fprintf(stderr, "\n%s : failed to read state\n", __func__);
|
|
126
|
-
llama_free(ctx2);
|
|
127
|
-
llama_free_model(model);
|
|
128
121
|
return 1;
|
|
129
122
|
}
|
|
130
123
|
|
|
@@ -148,8 +141,6 @@ int main(int argc, char ** argv) {
|
|
|
148
141
|
if (llama_decode(ctx2, batch)) {
|
|
149
142
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
|
150
143
|
llama_batch_free(batch);
|
|
151
|
-
llama_free(ctx2);
|
|
152
|
-
llama_free_model(model);
|
|
153
144
|
return 1;
|
|
154
145
|
}
|
|
155
146
|
n_past += 1;
|
|
@@ -157,15 +148,13 @@ int main(int argc, char ** argv) {
|
|
|
157
148
|
|
|
158
149
|
printf("\n\n");
|
|
159
150
|
|
|
160
|
-
llama_free(ctx2);
|
|
161
|
-
|
|
162
151
|
if (result0 != result1) {
|
|
163
152
|
fprintf(stderr, "\n%s : error : the 2 generations are different\n", __func__);
|
|
164
153
|
return 1;
|
|
165
154
|
}
|
|
166
155
|
|
|
167
156
|
// make new context
|
|
168
|
-
|
|
157
|
+
llama_context * ctx3 = llama_init_from_model(model, common_context_params_to_llama(params));
|
|
169
158
|
|
|
170
159
|
llama_sampler * smpl3 = llama_sampler_chain_init(sparams);
|
|
171
160
|
|
|
@@ -186,8 +175,6 @@ int main(int argc, char ** argv) {
|
|
|
186
175
|
|
|
187
176
|
if (read != llama_state_set_data(ctx3, state_mem.data(), state_mem.size())) {
|
|
188
177
|
fprintf(stderr, "\n%s : failed to read state\n", __func__);
|
|
189
|
-
llama_free(ctx3);
|
|
190
|
-
llama_free_model(model);
|
|
191
178
|
return 1;
|
|
192
179
|
}
|
|
193
180
|
|
|
@@ -204,8 +191,6 @@ int main(int argc, char ** argv) {
|
|
|
204
191
|
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0);
|
|
205
192
|
if (ncopy != seq_store.size()) {
|
|
206
193
|
fprintf(stderr, "\n%s : seq copy data length %zd does not match expected length %zd\n", __func__, ncopy, seq_store.size());
|
|
207
|
-
llama_free(ctx3);
|
|
208
|
-
llama_free_model(model);
|
|
209
194
|
return 1;
|
|
210
195
|
}
|
|
211
196
|
fprintf(stderr, "%s : seq 0 copied, %zd bytes\n", __func__, ncopy);
|
|
@@ -218,8 +203,6 @@ int main(int argc, char ** argv) {
|
|
|
218
203
|
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1);
|
|
219
204
|
if (nset != seq_store.size()) {
|
|
220
205
|
fprintf(stderr, "\n%s : seq set data length %zd does not match expected length %zd\n", __func__, nset, seq_store.size());
|
|
221
|
-
llama_free(ctx3);
|
|
222
|
-
llama_free_model(model);
|
|
223
206
|
return 1;
|
|
224
207
|
}
|
|
225
208
|
fprintf(stderr, "%s : seq 1 restored, %zd bytes\n", __func__, nset);
|
|
@@ -239,8 +222,6 @@ int main(int argc, char ** argv) {
|
|
|
239
222
|
if (llama_decode(ctx3, batch)) {
|
|
240
223
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
|
241
224
|
llama_batch_free(batch);
|
|
242
|
-
llama_free(ctx3);
|
|
243
|
-
llama_free_model(model);
|
|
244
225
|
return 1;
|
|
245
226
|
}
|
|
246
227
|
n_past += 1;
|
|
@@ -253,8 +234,6 @@ int main(int argc, char ** argv) {
|
|
|
253
234
|
llama_sampler_free(smpl3);
|
|
254
235
|
|
|
255
236
|
llama_batch_free(batch);
|
|
256
|
-
llama_free(ctx3);
|
|
257
|
-
llama_free_model(model);
|
|
258
237
|
|
|
259
238
|
if (result0 != result2) {
|
|
260
239
|
fprintf(stderr, "\n%s : error : the seq restore generation is different\n", __func__);
|
|
@@ -34,6 +34,7 @@ endforeach()
|
|
|
34
34
|
add_executable(${TARGET} ${TARGET_SRCS})
|
|
35
35
|
install(TARGETS ${TARGET} RUNTIME)
|
|
36
36
|
|
|
37
|
+
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
|
37
38
|
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
|
38
39
|
|
|
39
40
|
if (LLAMA_SERVER_SSL)
|