@fugood/llama.node 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +3 -1
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#include "
|
|
3
|
+
#include "ggml.h" // for ggml_log_level
|
|
4
4
|
|
|
5
5
|
#include <string>
|
|
6
6
|
#include <vector>
|
|
7
|
-
#include <stdexcept>
|
|
8
7
|
|
|
9
8
|
#ifdef __GNUC__
|
|
10
9
|
#ifdef __MINGW32__
|
|
@@ -35,147 +34,28 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
|
|
|
35
34
|
// helpers
|
|
36
35
|
//
|
|
37
36
|
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
template <typename T>
|
|
38
|
+
struct no_init {
|
|
39
|
+
T value;
|
|
40
|
+
no_init() { /* do nothing */ }
|
|
41
|
+
};
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
}
|
|
45
|
-
}
|
|
43
|
+
struct time_meas {
|
|
44
|
+
time_meas(int64_t & t_acc, bool disable = false);
|
|
45
|
+
~time_meas();
|
|
46
46
|
|
|
47
47
|
const int64_t t_start_us;
|
|
48
48
|
|
|
49
49
|
int64_t & t_acc;
|
|
50
50
|
};
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
if (search.empty()) {
|
|
54
|
-
return;
|
|
55
|
-
}
|
|
56
|
-
std::string builder;
|
|
57
|
-
builder.reserve(s.length());
|
|
58
|
-
size_t pos = 0;
|
|
59
|
-
size_t last_pos = 0;
|
|
60
|
-
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
|
61
|
-
builder.append(s, last_pos, pos - last_pos);
|
|
62
|
-
builder.append(replace);
|
|
63
|
-
last_pos = pos + search.length();
|
|
64
|
-
}
|
|
65
|
-
builder.append(s, last_pos, std::string::npos);
|
|
66
|
-
s = std::move(builder);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
|
|
70
|
-
struct llama_context * ctx
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
// the ring buffer works similarly to std::deque, but with a fixed capacity
|
|
74
|
-
template<typename T>
|
|
75
|
-
struct ring_buffer {
|
|
76
|
-
ring_buffer(size_t cap) : capacity(cap), data(cap) {}
|
|
77
|
-
|
|
78
|
-
T & front() {
|
|
79
|
-
if (sz == 0) {
|
|
80
|
-
throw std::runtime_error("ring buffer is empty");
|
|
81
|
-
}
|
|
82
|
-
return data[first];
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
const T & front() const {
|
|
86
|
-
if (sz == 0) {
|
|
87
|
-
throw std::runtime_error("ring buffer is empty");
|
|
88
|
-
}
|
|
89
|
-
return data[first];
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
T & back() {
|
|
93
|
-
if (sz == 0) {
|
|
94
|
-
throw std::runtime_error("ring buffer is empty");
|
|
95
|
-
}
|
|
96
|
-
return data[pos];
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
const T & back() const {
|
|
100
|
-
if (sz == 0) {
|
|
101
|
-
throw std::runtime_error("ring buffer is empty");
|
|
102
|
-
}
|
|
103
|
-
return data[pos];
|
|
104
|
-
}
|
|
52
|
+
void replace_all(std::string & s, const std::string & search, const std::string & replace);
|
|
105
53
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
54
|
+
// TODO: rename to llama_format ?
|
|
55
|
+
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
|
56
|
+
std::string format(const char * fmt, ...);
|
|
110
57
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
first = (first + 1) % capacity;
|
|
114
|
-
} else {
|
|
115
|
-
sz++;
|
|
116
|
-
}
|
|
117
|
-
data[pos] = value;
|
|
118
|
-
pos = (pos + 1) % capacity;
|
|
119
|
-
}
|
|
58
|
+
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
|
|
59
|
+
std::string llama_format_tensor_shape(const struct ggml_tensor * t);
|
|
120
60
|
|
|
121
|
-
|
|
122
|
-
if (sz == 0) {
|
|
123
|
-
throw std::runtime_error("ring buffer is empty");
|
|
124
|
-
}
|
|
125
|
-
T value = data[first];
|
|
126
|
-
first = (first + 1) % capacity;
|
|
127
|
-
sz--;
|
|
128
|
-
return value;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
//T & operator[](size_t i) {
|
|
132
|
-
// if (i >= sz) {
|
|
133
|
-
// throw std::runtime_error("ring buffer: index out of bounds");
|
|
134
|
-
// }
|
|
135
|
-
// return data[(first + i) % capacity];
|
|
136
|
-
//}
|
|
137
|
-
|
|
138
|
-
//const T & at(size_t i) const {
|
|
139
|
-
// if (i >= sz) {
|
|
140
|
-
// throw std::runtime_error("ring buffer: index out of bounds");
|
|
141
|
-
// }
|
|
142
|
-
// return data[(first + i) % capacity];
|
|
143
|
-
//}
|
|
144
|
-
|
|
145
|
-
const T & rat(size_t i) const {
|
|
146
|
-
if (i >= sz) {
|
|
147
|
-
throw std::runtime_error("ring buffer: index out of bounds");
|
|
148
|
-
}
|
|
149
|
-
return data[(first + sz - i - 1) % capacity];
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
std::vector<T> to_vector() const {
|
|
153
|
-
std::vector<T> result;
|
|
154
|
-
result.reserve(sz);
|
|
155
|
-
for (size_t i = 0; i < sz; i++) {
|
|
156
|
-
result.push_back(data[(first + i) % capacity]);
|
|
157
|
-
}
|
|
158
|
-
return result;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
void clear() {
|
|
162
|
-
// here only reset the status of the buffer
|
|
163
|
-
sz = 0;
|
|
164
|
-
first = 0;
|
|
165
|
-
pos = 0;
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
bool empty() const {
|
|
169
|
-
return sz == 0;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
size_t size() const {
|
|
173
|
-
return sz;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
size_t capacity = 0;
|
|
177
|
-
size_t sz = 0;
|
|
178
|
-
size_t first = 0;
|
|
179
|
-
size_t pos = 0;
|
|
180
|
-
std::vector<T> data;
|
|
181
|
-
};
|
|
61
|
+
std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i);
|