@fugood/llama.node 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +85 -0
- package/README.md +56 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/lib/binding.js +13 -0
- package/lib/binding.ts +57 -0
- package/lib/index.js +24 -0
- package/lib/index.ts +13 -0
- package/package.json +65 -0
- package/src/addons.cpp +506 -0
- package/src/llama.cpp/CMakeLists.txt +1320 -0
- package/src/llama.cpp/build.zig +172 -0
- package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
- package/src/llama.cpp/common/CMakeLists.txt +87 -0
- package/src/llama.cpp/common/base64.hpp +392 -0
- package/src/llama.cpp/common/common.cpp +2949 -0
- package/src/llama.cpp/common/common.h +324 -0
- package/src/llama.cpp/common/console.cpp +501 -0
- package/src/llama.cpp/common/console.h +19 -0
- package/src/llama.cpp/common/grammar-parser.cpp +440 -0
- package/src/llama.cpp/common/grammar-parser.h +29 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
- package/src/llama.cpp/common/json.hpp +24766 -0
- package/src/llama.cpp/common/log.h +724 -0
- package/src/llama.cpp/common/ngram-cache.cpp +282 -0
- package/src/llama.cpp/common/ngram-cache.h +94 -0
- package/src/llama.cpp/common/sampling.cpp +353 -0
- package/src/llama.cpp/common/sampling.h +147 -0
- package/src/llama.cpp/common/stb_image.h +8396 -0
- package/src/llama.cpp/common/train.cpp +1513 -0
- package/src/llama.cpp/common/train.h +233 -0
- package/src/llama.cpp/examples/CMakeLists.txt +52 -0
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
- package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched/batched.cpp +262 -0
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
- package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
- package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/infill/infill.cpp +767 -0
- package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
- package/src/llama.cpp/examples/llava/clip.h +85 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
- package/src/llama.cpp/examples/llava/llava.cpp +426 -0
- package/src/llama.cpp/examples/llava/llava.h +50 -0
- package/src/llama.cpp/examples/llava/requirements.txt +3 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
- package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
- package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/main/main.cpp +957 -0
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
- package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
- package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
- package/src/llama.cpp/examples/server/httplib.h +9465 -0
- package/src/llama.cpp/examples/server/server.cpp +3826 -0
- package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
- package/src/llama.cpp/examples/server/utils.hpp +653 -0
- package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple/simple.cpp +183 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
- package/src/llama.cpp/ggml-alloc.c +985 -0
- package/src/llama.cpp/ggml-alloc.h +76 -0
- package/src/llama.cpp/ggml-backend-impl.h +141 -0
- package/src/llama.cpp/ggml-backend.c +2099 -0
- package/src/llama.cpp/ggml-backend.h +233 -0
- package/src/llama.cpp/ggml-common.h +1853 -0
- package/src/llama.cpp/ggml-cuda.h +43 -0
- package/src/llama.cpp/ggml-impl.h +265 -0
- package/src/llama.cpp/ggml-kompute.cpp +2006 -0
- package/src/llama.cpp/ggml-kompute.h +46 -0
- package/src/llama.cpp/ggml-metal.h +66 -0
- package/src/llama.cpp/ggml-mpi.c +216 -0
- package/src/llama.cpp/ggml-mpi.h +39 -0
- package/src/llama.cpp/ggml-opencl.cpp +2301 -0
- package/src/llama.cpp/ggml-opencl.h +36 -0
- package/src/llama.cpp/ggml-quants.c +12678 -0
- package/src/llama.cpp/ggml-quants.h +133 -0
- package/src/llama.cpp/ggml-sycl.cpp +17882 -0
- package/src/llama.cpp/ggml-sycl.h +49 -0
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
- package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
- package/src/llama.cpp/ggml-vulkan.h +29 -0
- package/src/llama.cpp/ggml.c +21819 -0
- package/src/llama.cpp/ggml.h +2403 -0
- package/src/llama.cpp/llama.cpp +17468 -0
- package/src/llama.cpp/llama.h +1117 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
- package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
- package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
- package/src/llama.cpp/prompts/alpaca.txt +1 -0
- package/src/llama.cpp/prompts/assistant.txt +31 -0
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
- package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
- package/src/llama.cpp/prompts/chat.txt +28 -0
- package/src/llama.cpp/prompts/dan-modified.txt +1 -0
- package/src/llama.cpp/prompts/dan.txt +1 -0
- package/src/llama.cpp/prompts/mnemonics.txt +93 -0
- package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
- package/src/llama.cpp/prompts/reason-act.txt +18 -0
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
- package/src/llama.cpp/requirements.txt +12 -0
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
- package/src/llama.cpp/scripts/xxd.cmake +16 -0
- package/src/llama.cpp/sgemm.cpp +999 -0
- package/src/llama.cpp/sgemm.h +12 -0
- package/src/llama.cpp/tests/CMakeLists.txt +78 -0
- package/src/llama.cpp/tests/get-model.cpp +21 -0
- package/src/llama.cpp/tests/get-model.h +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
- package/src/llama.cpp/tests/test-c.c +7 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
- package/src/llama.cpp/tests/test-double-float.cpp +57 -0
- package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
- package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
- package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
- package/src/llama.cpp/tests/test-opt.cpp +181 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
- package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
- package/src/llama.cpp/tests/test-rope.cpp +221 -0
- package/src/llama.cpp/tests/test-sampling.cpp +301 -0
- package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
- package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
- package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
- package/src/llama.cpp/unicode-data.cpp +1651 -0
- package/src/llama.cpp/unicode-data.h +16 -0
- package/src/llama.cpp/unicode.cpp +277 -0
- package/src/llama.cpp/unicode.h +28 -0
package/src/addons.cpp
ADDED
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
#include "common/common.h"
|
|
2
|
+
#include "llama.h"
|
|
3
|
+
#include <memory>
|
|
4
|
+
#include <mutex>
|
|
5
|
+
#include <napi.h>
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <thread>
|
|
8
|
+
#include <tuple>
|
|
9
|
+
#include <vector>
|
|
10
|
+
|
|
11
|
+
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
12
|
+
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
13
|
+
typedef std::unique_ptr<llama_sampling_context, decltype(&llama_sampling_free)>
|
|
14
|
+
LlamaCppSampling;
|
|
15
|
+
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
16
|
+
|
|
17
|
+
size_t common_part(const std::vector<llama_token> &a,
|
|
18
|
+
const std::vector<llama_token> &b) {
|
|
19
|
+
size_t i = 0;
|
|
20
|
+
while (i < a.size() && i < b.size() && a[i] == b[i]) {
|
|
21
|
+
i++;
|
|
22
|
+
}
|
|
23
|
+
return i;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
class LlamaCompletionWorker;
|
|
27
|
+
|
|
28
|
+
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
29
|
+
public:
|
|
30
|
+
// construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
|
|
31
|
+
// use_mlock, use_mmap }): LlamaContext throws error
|
|
32
|
+
LlamaContext(const Napi::CallbackInfo &info)
|
|
33
|
+
: Napi::ObjectWrap<LlamaContext>(info) {
|
|
34
|
+
Napi::Env env = info.Env();
|
|
35
|
+
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
36
|
+
Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
|
|
37
|
+
}
|
|
38
|
+
auto options = info[0].As<Napi::Object>();
|
|
39
|
+
|
|
40
|
+
if (options.Has("model")) {
|
|
41
|
+
params.model = options.Get("model").ToString();
|
|
42
|
+
}
|
|
43
|
+
if (options.Has("embedding")) {
|
|
44
|
+
params.embedding = options.Get("embedding").ToBoolean();
|
|
45
|
+
}
|
|
46
|
+
if (options.Has("n_ctx")) {
|
|
47
|
+
params.n_ctx = options.Get("n_ctx").ToNumber();
|
|
48
|
+
}
|
|
49
|
+
if (options.Has("n_batch")) {
|
|
50
|
+
params.n_batch = options.Get("n_batch").ToNumber();
|
|
51
|
+
}
|
|
52
|
+
if (options.Has("n_threads")) {
|
|
53
|
+
params.n_threads = options.Get("n_threads").ToNumber();
|
|
54
|
+
}
|
|
55
|
+
if (options.Has("n_gpu_layers")) {
|
|
56
|
+
params.n_gpu_layers = options.Get("n_gpu_layers").ToNumber();
|
|
57
|
+
}
|
|
58
|
+
if (options.Has("use_mlock")) {
|
|
59
|
+
params.use_mlock = options.Get("use_mlock").ToBoolean();
|
|
60
|
+
}
|
|
61
|
+
if (options.Has("use_mmap")) {
|
|
62
|
+
params.use_mmap = options.Get("use_mmap").ToBoolean();
|
|
63
|
+
}
|
|
64
|
+
if (options.Has("numa")) {
|
|
65
|
+
int numa = options.Get("numa").ToNumber();
|
|
66
|
+
params.numa = static_cast<ggml_numa_strategy>(numa);
|
|
67
|
+
}
|
|
68
|
+
if (options.Has("seed")) {
|
|
69
|
+
params.seed = options.Get("seed").ToNumber();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
llama_backend_init();
|
|
73
|
+
llama_numa_init(params.numa);
|
|
74
|
+
|
|
75
|
+
auto tuple = llama_init_from_gpt_params(params);
|
|
76
|
+
model.reset(std::get<0>(tuple));
|
|
77
|
+
ctx.reset(std::get<1>(tuple));
|
|
78
|
+
|
|
79
|
+
if (model == nullptr || ctx == nullptr) {
|
|
80
|
+
Napi::TypeError::New(env, "Failed to load model")
|
|
81
|
+
.ThrowAsJavaScriptException();
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static void Export(Napi::Env env, Napi::Object &exports) {
|
|
86
|
+
Napi::Function func = DefineClass(
|
|
87
|
+
env, "LlamaContext",
|
|
88
|
+
{InstanceMethod<&LlamaContext::GetSystemInfo>(
|
|
89
|
+
"getSystemInfo",
|
|
90
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
91
|
+
InstanceMethod<&LlamaContext::Completion>(
|
|
92
|
+
"completion",
|
|
93
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
94
|
+
InstanceMethod<&LlamaContext::StopCompletion>(
|
|
95
|
+
"stopCompletion",
|
|
96
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
97
|
+
InstanceMethod<&LlamaContext::SaveSession>(
|
|
98
|
+
"saveSession",
|
|
99
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
100
|
+
InstanceMethod<&LlamaContext::LoadSession>(
|
|
101
|
+
"loadSession",
|
|
102
|
+
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
103
|
+
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
104
|
+
*constructor = Napi::Persistent(func);
|
|
105
|
+
#if NAPI_VERSION > 5
|
|
106
|
+
env.SetInstanceData(constructor);
|
|
107
|
+
#endif
|
|
108
|
+
exports.Set("LlamaContext", func);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
llama_context *getContext() { return ctx.get(); }
|
|
112
|
+
llama_model *getModel() { return model.get(); }
|
|
113
|
+
|
|
114
|
+
std::vector<llama_token> *getTokens() { return tokens.get(); }
|
|
115
|
+
|
|
116
|
+
const gpt_params &getParams() const { return params; }
|
|
117
|
+
|
|
118
|
+
void ensureTokens() {
|
|
119
|
+
if (tokens == nullptr) {
|
|
120
|
+
tokens = std::make_unique<std::vector<llama_token>>();
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
void setTokens(std::vector<llama_token> tokens) {
|
|
125
|
+
this->tokens.reset(new std::vector<llama_token>(std::move(tokens)));
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
std::mutex &getMutex() { return mutex; }
|
|
129
|
+
|
|
130
|
+
private:
|
|
131
|
+
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
132
|
+
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
133
|
+
void StopCompletion(const Napi::CallbackInfo &info);
|
|
134
|
+
Napi::Value SaveSession(const Napi::CallbackInfo &info);
|
|
135
|
+
Napi::Value LoadSession(const Napi::CallbackInfo &info);
|
|
136
|
+
|
|
137
|
+
gpt_params params;
|
|
138
|
+
LlamaCppModel model{nullptr, llama_free_model};
|
|
139
|
+
LlamaCppContext ctx{nullptr, llama_free};
|
|
140
|
+
std::unique_ptr<std::vector<llama_token>> tokens;
|
|
141
|
+
std::mutex mutex;
|
|
142
|
+
LlamaCompletionWorker *compl_worker = nullptr;
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
146
|
+
public Napi::Promise::Deferred {
|
|
147
|
+
LlamaContext *_ctx;
|
|
148
|
+
gpt_params _params;
|
|
149
|
+
std::vector<std::string> _stop_words;
|
|
150
|
+
std::string generated_text = "";
|
|
151
|
+
Napi::ThreadSafeFunction _tsfn;
|
|
152
|
+
bool _has_callback = false;
|
|
153
|
+
bool _stop = false;
|
|
154
|
+
size_t tokens_predicted = 0;
|
|
155
|
+
size_t tokens_evaluated = 0;
|
|
156
|
+
bool truncated = false;
|
|
157
|
+
|
|
158
|
+
public:
|
|
159
|
+
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx,
|
|
160
|
+
Napi::Function callback, gpt_params params,
|
|
161
|
+
std::vector<std::string> stop_words = {})
|
|
162
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _ctx(ctx),
|
|
163
|
+
_params(params), _stop_words(stop_words) {
|
|
164
|
+
_ctx->Ref();
|
|
165
|
+
if (!callback.IsEmpty()) {
|
|
166
|
+
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
167
|
+
"LlamaCompletionCallback", 0, 1);
|
|
168
|
+
_has_callback = true;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
~LlamaCompletionWorker() {
|
|
173
|
+
_ctx->Unref();
|
|
174
|
+
if (_has_callback) {
|
|
175
|
+
_tsfn.Abort();
|
|
176
|
+
_tsfn.Release();
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
void Stop() { _stop = true; }
|
|
181
|
+
|
|
182
|
+
protected:
|
|
183
|
+
size_t findStoppingStrings(const std::string &text,
|
|
184
|
+
const size_t last_token_size) {
|
|
185
|
+
size_t stop_pos = std::string::npos;
|
|
186
|
+
|
|
187
|
+
for (const std::string &word : _stop_words) {
|
|
188
|
+
size_t pos;
|
|
189
|
+
|
|
190
|
+
const size_t tmp = word.size() + last_token_size;
|
|
191
|
+
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
|
192
|
+
|
|
193
|
+
pos = text.find(word, from_pos);
|
|
194
|
+
|
|
195
|
+
if (pos != std::string::npos &&
|
|
196
|
+
(stop_pos == std::string::npos || pos < stop_pos)) {
|
|
197
|
+
stop_pos = pos;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return stop_pos;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
void Execute() {
|
|
205
|
+
_ctx->getMutex().lock();
|
|
206
|
+
_ctx->ensureTokens();
|
|
207
|
+
const auto t_main_start = ggml_time_us();
|
|
208
|
+
const size_t n_ctx = _params.n_ctx;
|
|
209
|
+
auto n_keep = _params.n_keep;
|
|
210
|
+
auto n_predict = _params.n_predict;
|
|
211
|
+
size_t n_cur = 0;
|
|
212
|
+
size_t n_input = 0;
|
|
213
|
+
const bool add_bos = llama_should_add_bos_token(_ctx->getModel());
|
|
214
|
+
auto *ctx = _ctx->getContext();
|
|
215
|
+
|
|
216
|
+
llama_set_rng_seed(ctx, _params.seed);
|
|
217
|
+
|
|
218
|
+
LlamaCppSampling sampling{llama_sampling_init(_params.sparams),
|
|
219
|
+
llama_sampling_free};
|
|
220
|
+
|
|
221
|
+
std::vector<llama_token> prompt_tokens =
|
|
222
|
+
::llama_tokenize(ctx, _params.prompt, add_bos);
|
|
223
|
+
n_input = prompt_tokens.size();
|
|
224
|
+
if (_ctx->getTokens() != nullptr) {
|
|
225
|
+
n_cur = common_part(*_ctx->getTokens(), prompt_tokens);
|
|
226
|
+
if (n_cur == n_input) {
|
|
227
|
+
--n_cur;
|
|
228
|
+
}
|
|
229
|
+
n_input -= n_cur;
|
|
230
|
+
llama_kv_cache_seq_rm(ctx, 0, n_cur, -1);
|
|
231
|
+
}
|
|
232
|
+
_ctx->setTokens(std::move(prompt_tokens));
|
|
233
|
+
|
|
234
|
+
const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
|
|
235
|
+
|
|
236
|
+
for (int i = 0; i < max_len || _stop; i++) {
|
|
237
|
+
auto *embd = _ctx->getTokens();
|
|
238
|
+
// check if we need to remove some tokens
|
|
239
|
+
if (embd->size() >= n_ctx) {
|
|
240
|
+
const int n_left = n_cur - n_keep - 1;
|
|
241
|
+
const int n_discard = n_left / 2;
|
|
242
|
+
|
|
243
|
+
llama_kv_cache_seq_rm(ctx, 0, n_keep + 1, n_keep + n_discard + 1);
|
|
244
|
+
llama_kv_cache_seq_add(ctx, 0, n_keep + 1 + n_discard, n_cur,
|
|
245
|
+
-n_discard);
|
|
246
|
+
|
|
247
|
+
for (size_t i = n_keep + 1 + n_discard; i < embd->size(); i++) {
|
|
248
|
+
(*embd)[i - n_discard] = (*embd)[i];
|
|
249
|
+
}
|
|
250
|
+
embd->resize(embd->size() - n_discard);
|
|
251
|
+
|
|
252
|
+
n_cur -= n_discard;
|
|
253
|
+
truncated = true;
|
|
254
|
+
}
|
|
255
|
+
int ret = llama_decode(
|
|
256
|
+
ctx, llama_batch_get_one(embd->data() + n_cur, n_input, n_cur, 0));
|
|
257
|
+
if (ret < 0) {
|
|
258
|
+
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
259
|
+
break;
|
|
260
|
+
}
|
|
261
|
+
// sample the next token
|
|
262
|
+
const llama_token new_token_id =
|
|
263
|
+
llama_sampling_sample(sampling.get(), ctx, nullptr);
|
|
264
|
+
// prepare the next batch
|
|
265
|
+
embd->push_back(new_token_id);
|
|
266
|
+
auto token = llama_token_to_piece(ctx, new_token_id);
|
|
267
|
+
generated_text += token;
|
|
268
|
+
n_cur += n_input;
|
|
269
|
+
tokens_evaluated += n_input;
|
|
270
|
+
tokens_predicted += 1;
|
|
271
|
+
n_input = 1;
|
|
272
|
+
if (_has_callback) {
|
|
273
|
+
// _cb.Call({ Napi::String::New(AsyncWorker::Env(), token) });
|
|
274
|
+
const char *c_token = strdup(token.c_str());
|
|
275
|
+
_tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
|
|
276
|
+
const char *value) {
|
|
277
|
+
auto obj = Napi::Object::New(env);
|
|
278
|
+
obj.Set("token", Napi::String::New(env, value));
|
|
279
|
+
jsCallback.Call({obj});
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
// is it an end of generation?
|
|
283
|
+
if (llama_token_is_eog(_ctx->getModel(), new_token_id)) {
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
// check for stop words
|
|
287
|
+
if (!_stop_words.empty()) {
|
|
288
|
+
const size_t stop_pos =
|
|
289
|
+
findStoppingStrings(generated_text, token.size());
|
|
290
|
+
if (stop_pos != std::string::npos) {
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
const auto t_main_end = ggml_time_us();
|
|
296
|
+
_ctx->getMutex().unlock();
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
void OnOK() {
|
|
300
|
+
auto result = Napi::Object::New(Napi::AsyncWorker::Env());
|
|
301
|
+
result.Set("tokens_evaluated",
|
|
302
|
+
Napi::Number::New(Napi::AsyncWorker::Env(), tokens_evaluated));
|
|
303
|
+
result.Set("tokens_predicted",
|
|
304
|
+
Napi::Number::New(Napi::AsyncWorker::Env(), tokens_predicted));
|
|
305
|
+
result.Set("truncated",
|
|
306
|
+
Napi::Boolean::New(Napi::AsyncWorker::Env(), truncated));
|
|
307
|
+
result.Set("text",
|
|
308
|
+
Napi::String::New(Napi::AsyncWorker::Env(), generated_text));
|
|
309
|
+
Napi::Promise::Deferred::Resolve(result);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
void OnError(const Napi::Error &err) {
|
|
313
|
+
Napi::Promise::Deferred::Reject(err.Value());
|
|
314
|
+
}
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
class SaveSessionWorker : public Napi::AsyncWorker,
|
|
318
|
+
public Napi::Promise::Deferred {
|
|
319
|
+
std::string _path;
|
|
320
|
+
LlamaContext *_ctx;
|
|
321
|
+
|
|
322
|
+
public:
|
|
323
|
+
SaveSessionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx)
|
|
324
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()),
|
|
325
|
+
_path(info[0].ToString()), _ctx(ctx) {
|
|
326
|
+
_ctx->Ref();
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
protected:
|
|
330
|
+
void Execute() {
|
|
331
|
+
_ctx->getMutex().lock();
|
|
332
|
+
if (_ctx->getTokens() == nullptr) {
|
|
333
|
+
SetError("Failed to save session");
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
if (!llama_state_save_file(_ctx->getContext(), _path.c_str(),
|
|
337
|
+
_ctx->getTokens()->data(),
|
|
338
|
+
_ctx->getTokens()->size())) {
|
|
339
|
+
SetError("Failed to save session");
|
|
340
|
+
}
|
|
341
|
+
_ctx->getMutex().unlock();
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
void OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
345
|
+
|
|
346
|
+
void OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
class LoadSessionWorker : public Napi::AsyncWorker,
|
|
350
|
+
public Napi::Promise::Deferred {
|
|
351
|
+
std::string _path;
|
|
352
|
+
LlamaContext *_ctx;
|
|
353
|
+
size_t count = 0;
|
|
354
|
+
|
|
355
|
+
public:
|
|
356
|
+
LoadSessionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx)
|
|
357
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()),
|
|
358
|
+
_path(info[0].ToString()), _ctx(ctx) {
|
|
359
|
+
_ctx->Ref();
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
protected:
|
|
363
|
+
void Execute() {
|
|
364
|
+
_ctx->getMutex().lock();
|
|
365
|
+
_ctx->ensureTokens();
|
|
366
|
+
// reserve the maximum number of tokens for capacity
|
|
367
|
+
_ctx->getTokens()->reserve(_ctx->getParams().n_ctx);
|
|
368
|
+
if (!llama_state_load_file(_ctx->getContext(), _path.c_str(),
|
|
369
|
+
_ctx->getTokens()->data(),
|
|
370
|
+
_ctx->getTokens()->capacity(), &count)) {
|
|
371
|
+
SetError("Failed to load session");
|
|
372
|
+
}
|
|
373
|
+
_ctx->getMutex().unlock();
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
void OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
377
|
+
|
|
378
|
+
void OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
379
|
+
};
|
|
380
|
+
|
|
381
|
+
// getSystemInfo(): string
|
|
382
|
+
Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
383
|
+
return Napi::String::New(info.Env(), get_system_info(params).c_str());
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
|
|
387
|
+
// void): Promise<LlamaCompletionResult>
|
|
388
|
+
Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
389
|
+
Napi::Env env = info.Env();
|
|
390
|
+
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
391
|
+
Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
|
|
392
|
+
}
|
|
393
|
+
if (info.Length() >= 2 && !info[1].IsFunction()) {
|
|
394
|
+
Napi::TypeError::New(env, "Function expected").ThrowAsJavaScriptException();
|
|
395
|
+
}
|
|
396
|
+
auto options = info[0].As<Napi::Object>();
|
|
397
|
+
|
|
398
|
+
gpt_params params;
|
|
399
|
+
if (options.Has("prompt")) {
|
|
400
|
+
params.prompt = options.Get("prompt").ToString();
|
|
401
|
+
} else {
|
|
402
|
+
Napi::TypeError::New(env, "Prompt is required")
|
|
403
|
+
.ThrowAsJavaScriptException();
|
|
404
|
+
}
|
|
405
|
+
params.n_predict =
|
|
406
|
+
options.Has("n_predict") ? options.Get("n_predict").ToNumber() : -1;
|
|
407
|
+
params.sparams.temp = options.Has("temperature")
|
|
408
|
+
? options.Get("temperature").ToNumber()
|
|
409
|
+
: 0.80f;
|
|
410
|
+
params.sparams.top_k =
|
|
411
|
+
options.Has("top_k") ? options.Get("top_k").ToNumber() : 40;
|
|
412
|
+
params.sparams.top_p =
|
|
413
|
+
options.Has("top_p") ? options.Get("top_p").ToNumber() : 0.95f;
|
|
414
|
+
params.sparams.min_p =
|
|
415
|
+
options.Has("min_p") ? options.Get("min_p").ToNumber() : 0.05f;
|
|
416
|
+
params.sparams.tfs_z =
|
|
417
|
+
options.Has("tfs_z") ? options.Get("tfs_z").ToNumber() : 1.00f;
|
|
418
|
+
params.sparams.mirostat =
|
|
419
|
+
options.Has("mirostat") ? options.Get("mirostat").ToNumber() : 0;
|
|
420
|
+
params.sparams.mirostat_tau = options.Has("mirostat_tau")
|
|
421
|
+
? options.Get("mirostat_tau").ToNumber()
|
|
422
|
+
: 5.00f;
|
|
423
|
+
params.sparams.mirostat_eta = options.Has("mirostat_eta")
|
|
424
|
+
? options.Get("mirostat_eta").ToNumber()
|
|
425
|
+
: 0.10f;
|
|
426
|
+
params.sparams.penalty_last_n = options.Has("penalty_last_n")
|
|
427
|
+
? options.Get("penalty_last_n").ToNumber()
|
|
428
|
+
: 64;
|
|
429
|
+
params.sparams.penalty_repeat = options.Has("penalty_repeat")
|
|
430
|
+
? options.Get("penalty_repeat").ToNumber()
|
|
431
|
+
: 1.00f;
|
|
432
|
+
params.sparams.penalty_freq = options.Has("penalty_freq")
|
|
433
|
+
? options.Get("penalty_freq").ToNumber()
|
|
434
|
+
: 0.00f;
|
|
435
|
+
params.sparams.penalty_present =
|
|
436
|
+
options.Has("penalty_present") ? options.Get("penalty_present").ToNumber()
|
|
437
|
+
: 0.00f;
|
|
438
|
+
params.sparams.penalize_nl = options.Has("penalize_nl")
|
|
439
|
+
? options.Get("penalize_nl").ToBoolean()
|
|
440
|
+
: false;
|
|
441
|
+
params.sparams.typical_p =
|
|
442
|
+
options.Has("typical_p") ? options.Get("typical_p").ToNumber() : 1.00f;
|
|
443
|
+
params.ignore_eos =
|
|
444
|
+
options.Has("ignore_eos") ? options.Get("ignore_eos").ToBoolean() : false;
|
|
445
|
+
params.sparams.grammar = options.Has("grammar")
|
|
446
|
+
? options.Get("grammar").ToString().Utf8Value()
|
|
447
|
+
: "";
|
|
448
|
+
params.n_keep = options.Has("n_keep") ? options.Get("n_keep").ToNumber() : 0;
|
|
449
|
+
params.seed =
|
|
450
|
+
options.Has("seed") ? options.Get("seed").ToNumber() : LLAMA_DEFAULT_SEED;
|
|
451
|
+
std::vector<std::string> stop_words;
|
|
452
|
+
if (options.Has("stop")) {
|
|
453
|
+
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
454
|
+
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
455
|
+
stop_words.push_back(stop_words_array.Get(i).ToString());
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// options.on_sample
|
|
460
|
+
Napi::Function callback;
|
|
461
|
+
if (info.Length() >= 2) {
|
|
462
|
+
callback = info[1].As<Napi::Function>();
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
auto worker =
|
|
466
|
+
new LlamaCompletionWorker(info, this, callback, params, stop_words);
|
|
467
|
+
worker->Queue();
|
|
468
|
+
compl_worker = worker;
|
|
469
|
+
return worker->Promise();
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// stopCompletion(): void
|
|
473
|
+
void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
|
|
474
|
+
if (compl_worker != nullptr) {
|
|
475
|
+
compl_worker->Stop();
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// saveSession(path: string): Promise<void> throws error
|
|
480
|
+
Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
|
|
481
|
+
Napi::Env env = info.Env();
|
|
482
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
483
|
+
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
484
|
+
}
|
|
485
|
+
auto *worker = new SaveSessionWorker(info, this);
|
|
486
|
+
worker->Queue();
|
|
487
|
+
return worker->Promise();
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// loadSession(path: string): Promise<{ count }> throws error
|
|
491
|
+
Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
|
|
492
|
+
Napi::Env env = info.Env();
|
|
493
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
494
|
+
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
495
|
+
}
|
|
496
|
+
auto *worker = new LoadSessionWorker(info, this);
|
|
497
|
+
worker->Queue();
|
|
498
|
+
return worker->Promise();
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
502
|
+
LlamaContext::Export(env, exports);
|
|
503
|
+
return exports;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
NODE_API_MODULE(addons, Init)
|