@fugood/llama.node 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +8 -0
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +156 -6
- package/src/LlamaContext.h +5 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
package/README.md
CHANGED
|
@@ -4,9 +4,23 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/@fugood/llama.node)
|
|
5
5
|

|
|
6
6
|
|
|
7
|
-
Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
|
|
7
|
+
An another Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp) to make same API with [llama.rn](https://github.com/mybigday/llama.rn) as much as possible.
|
|
8
8
|
|
|
9
|
-
[llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
|
9
|
+
- [llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
|
10
|
+
- [llama.rn](https://github.com/mybigday/llama.rn): React Native binding of llama.cpp
|
|
11
|
+
|
|
12
|
+
## Platform Support
|
|
13
|
+
|
|
14
|
+
- macOS
|
|
15
|
+
- arm64: CPU and Metal GPU acceleration
|
|
16
|
+
- x86_64: CPU only
|
|
17
|
+
- Windows (x86_64 and arm64)
|
|
18
|
+
- CPU
|
|
19
|
+
- GPU acceleration via Vulkan
|
|
20
|
+
- Linux (x86_64 and arm64)
|
|
21
|
+
- CPU
|
|
22
|
+
- GPU acceleration via Vulkan
|
|
23
|
+
- GPU acceleration via CUDA
|
|
10
24
|
|
|
11
25
|
## Installation
|
|
12
26
|
|
|
@@ -49,6 +63,7 @@ console.log('Result:', text)
|
|
|
49
63
|
|
|
50
64
|
- [x] `default`: General usage, not support GPU except macOS (Metal)
|
|
51
65
|
- [x] `vulkan`: Support GPU Vulkan (Windows/Linux), but some scenario might unstable
|
|
66
|
+
- [x] `cuda`: Support GPU CUDA (Linux), but only for limited capability (x86_64: 8.9, arm64: 8.7)
|
|
52
67
|
|
|
53
68
|
## License
|
|
54
69
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -37,6 +37,9 @@ export type LlamaModelOptions = {
|
|
|
37
37
|
use_mlock?: boolean
|
|
38
38
|
use_mmap?: boolean
|
|
39
39
|
vocab_only?: boolean
|
|
40
|
+
lora?: string
|
|
41
|
+
lora_scaled?: number
|
|
42
|
+
lora_list?: { path: string; scaled: number }[]
|
|
40
43
|
}
|
|
41
44
|
|
|
42
45
|
export type LlamaCompletionOptions = {
|
|
@@ -111,6 +114,11 @@ export interface LlamaContext {
|
|
|
111
114
|
saveSession(path: string): Promise<void>
|
|
112
115
|
loadSession(path: string): Promise<void>
|
|
113
116
|
release(): Promise<void>
|
|
117
|
+
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
|
|
118
|
+
removeLoraAdapters(adapters: { path: string }[]): void
|
|
119
|
+
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
120
|
+
// static
|
|
121
|
+
loadModelInfo(path: string, skip: string[]): Promise<Object>
|
|
114
122
|
}
|
|
115
123
|
|
|
116
124
|
export interface Module {
|
package/lib/index.js
CHANGED
|
@@ -23,7 +23,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
23
23
|
});
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.loadModel = void 0;
|
|
26
|
+
exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
|
|
27
27
|
const binding_1 = require("./binding");
|
|
28
28
|
__exportStar(require("./binding"), exports);
|
|
29
29
|
const mods = {};
|
|
@@ -34,3 +34,18 @@ const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
34
34
|
return new mods[variant].LlamaContext(options);
|
|
35
35
|
});
|
|
36
36
|
exports.loadModel = loadModel;
|
|
37
|
+
exports.initLlama = binding_1.loadModule;
|
|
38
|
+
const modelInfoSkip = [
|
|
39
|
+
// Large fields
|
|
40
|
+
'tokenizer.ggml.tokens',
|
|
41
|
+
'tokenizer.ggml.token_type',
|
|
42
|
+
'tokenizer.ggml.merges',
|
|
43
|
+
'tokenizer.ggml.scores',
|
|
44
|
+
];
|
|
45
|
+
const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function* () {
|
|
46
|
+
var _a;
|
|
47
|
+
const variant = 'default';
|
|
48
|
+
(_a = mods[variant]) !== null && _a !== void 0 ? _a : (mods[variant] = yield (0, binding_1.loadModule)(variant));
|
|
49
|
+
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
|
|
50
|
+
});
|
|
51
|
+
exports.loadLlamaModelInfo = loadLlamaModelInfo;
|
package/lib/index.ts
CHANGED
|
@@ -14,3 +14,19 @@ export const loadModel = async (options: LlamaModelOptionsExtended): Promise<Lla
|
|
|
14
14
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
15
15
|
return new mods[variant].LlamaContext(options)
|
|
16
16
|
}
|
|
17
|
+
|
|
18
|
+
export const initLlama = loadModule
|
|
19
|
+
|
|
20
|
+
const modelInfoSkip = [
|
|
21
|
+
// Large fields
|
|
22
|
+
'tokenizer.ggml.tokens',
|
|
23
|
+
'tokenizer.ggml.token_type',
|
|
24
|
+
'tokenizer.ggml.merges',
|
|
25
|
+
'tokenizer.ggml.scores',
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
|
|
29
|
+
const variant = 'default'
|
|
30
|
+
mods[variant] ??= await loadModule(variant)
|
|
31
|
+
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
|
|
32
|
+
}
|
package/package.json
CHANGED
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -9,10 +9,11 @@ void EmbeddingWorker::Execute() {
|
|
|
9
9
|
llama_kv_cache_clear(_sess->context());
|
|
10
10
|
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
11
11
|
// add SEP if not present
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
auto vocab = llama_model_get_vocab(_sess->model());
|
|
13
|
+
if (tokens.empty() || tokens.back() != llama_vocab_sep(vocab)) {
|
|
14
|
+
tokens.push_back(llama_vocab_sep(vocab));
|
|
14
15
|
}
|
|
15
|
-
const int n_embd =
|
|
16
|
+
const int n_embd = llama_model_n_embd(_sess->model());
|
|
16
17
|
do {
|
|
17
18
|
auto ctx = _sess->context();
|
|
18
19
|
int ret =
|
|
@@ -59,7 +59,9 @@ void LlamaCompletionWorker::Execute() {
|
|
|
59
59
|
size_t n_cur = 0;
|
|
60
60
|
size_t n_input = 0;
|
|
61
61
|
const auto model = _sess->model();
|
|
62
|
-
|
|
62
|
+
auto vocab = llama_model_get_vocab(model);
|
|
63
|
+
|
|
64
|
+
const bool add_bos = llama_vocab_get_add_bos(vocab);
|
|
63
65
|
auto ctx = _sess->context();
|
|
64
66
|
|
|
65
67
|
auto sparams = llama_sampler_chain_default_params();
|
|
@@ -130,7 +132,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
130
132
|
});
|
|
131
133
|
}
|
|
132
134
|
// is it an end of generation?
|
|
133
|
-
if (
|
|
135
|
+
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
|
134
136
|
break;
|
|
135
137
|
}
|
|
136
138
|
// check for stop words
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#include "ggml.h"
|
|
2
|
+
#include "gguf.h"
|
|
3
|
+
#include "llama-impl.h"
|
|
2
4
|
#include "LlamaContext.h"
|
|
3
5
|
#include "DetokenizeWorker.h"
|
|
4
6
|
#include "DisposeWorker.h"
|
|
@@ -8,6 +10,56 @@
|
|
|
8
10
|
#include "SaveSessionWorker.h"
|
|
9
11
|
#include "TokenizeWorker.h"
|
|
10
12
|
|
|
13
|
+
// loadModelInfo(path: string): object
|
|
14
|
+
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
15
|
+
Napi::Env env = info.Env();
|
|
16
|
+
struct gguf_init_params params = {
|
|
17
|
+
/*.no_alloc = */ false,
|
|
18
|
+
/*.ctx = */ NULL,
|
|
19
|
+
};
|
|
20
|
+
std::string path = info[0].ToString().Utf8Value();
|
|
21
|
+
|
|
22
|
+
// Convert Napi::Array to vector<string>
|
|
23
|
+
std::vector<std::string> skip;
|
|
24
|
+
if (info.Length() > 1 && info[1].IsArray()) {
|
|
25
|
+
Napi::Array skipArray = info[1].As<Napi::Array>();
|
|
26
|
+
for (uint32_t i = 0; i < skipArray.Length(); i++) {
|
|
27
|
+
skip.push_back(skipArray.Get(i).ToString().Utf8Value());
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
|
|
32
|
+
|
|
33
|
+
Napi::Object metadata = Napi::Object::New(env);
|
|
34
|
+
if (std::find(skip.begin(), skip.end(), "version") == skip.end()) {
|
|
35
|
+
metadata.Set("version", Napi::Number::New(env, gguf_get_version(ctx)));
|
|
36
|
+
}
|
|
37
|
+
if (std::find(skip.begin(), skip.end(), "alignment") == skip.end()) {
|
|
38
|
+
metadata.Set("alignment", Napi::Number::New(env, gguf_get_alignment(ctx)));
|
|
39
|
+
}
|
|
40
|
+
if (std::find(skip.begin(), skip.end(), "data_offset") == skip.end()) {
|
|
41
|
+
metadata.Set("data_offset", Napi::Number::New(env, gguf_get_data_offset(ctx)));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// kv
|
|
45
|
+
{
|
|
46
|
+
const int n_kv = gguf_get_n_kv(ctx);
|
|
47
|
+
|
|
48
|
+
for (int i = 0; i < n_kv; ++i) {
|
|
49
|
+
const char * key = gguf_get_key(ctx, i);
|
|
50
|
+
if (std::find(skip.begin(), skip.end(), key) != skip.end()) {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
const std::string value = gguf_kv_to_str(ctx, i);
|
|
54
|
+
metadata.Set(key, Napi::String::New(env, value.c_str()));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
gguf_free(ctx);
|
|
59
|
+
|
|
60
|
+
return metadata;
|
|
61
|
+
}
|
|
62
|
+
|
|
11
63
|
std::vector<common_chat_msg> get_messages(Napi::Array messages) {
|
|
12
64
|
std::vector<common_chat_msg> chat;
|
|
13
65
|
for (size_t i = 0; i < messages.Length(); i++) {
|
|
@@ -51,8 +103,20 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
51
103
|
InstanceMethod<&LlamaContext::LoadSession>(
|
|
52
104
|
"loadSession",
|
|
53
105
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
106
|
+
InstanceMethod<&LlamaContext::ApplyLoraAdapters>(
|
|
107
|
+
"applyLoraAdapters",
|
|
108
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
109
|
+
InstanceMethod<&LlamaContext::RemoveLoraAdapters>(
|
|
110
|
+
"removeLoraAdapters",
|
|
111
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
112
|
+
InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
|
|
113
|
+
"getLoadedLoraAdapters",
|
|
114
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
54
115
|
InstanceMethod<&LlamaContext::Release>(
|
|
55
|
-
"release", static_cast<napi_property_attributes>(napi_enumerable))
|
|
116
|
+
"release", static_cast<napi_property_attributes>(napi_enumerable)),
|
|
117
|
+
StaticMethod<&LlamaContext::ModelInfo>(
|
|
118
|
+
"loadModelInfo",
|
|
119
|
+
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
56
120
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
57
121
|
*constructor = Napi::Persistent(func);
|
|
58
122
|
#if NAPI_VERSION > 5
|
|
@@ -140,14 +204,56 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
140
204
|
llama_backend_init();
|
|
141
205
|
llama_numa_init(params.numa);
|
|
142
206
|
|
|
143
|
-
auto
|
|
207
|
+
auto sess = std::make_shared<LlamaSession>(params);
|
|
144
208
|
|
|
145
|
-
if (
|
|
209
|
+
if (sess->model() == nullptr || sess->context() == nullptr) {
|
|
146
210
|
Napi::TypeError::New(env, "Failed to load model")
|
|
147
211
|
.ThrowAsJavaScriptException();
|
|
148
212
|
}
|
|
149
213
|
|
|
150
|
-
|
|
214
|
+
auto ctx = sess->context();
|
|
215
|
+
auto model = sess->model();
|
|
216
|
+
|
|
217
|
+
std::vector<common_adapter_lora_info> lora;
|
|
218
|
+
auto lora_path = get_option<std::string>(options, "lora", "");
|
|
219
|
+
auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
|
|
220
|
+
if (lora_path != "") {
|
|
221
|
+
common_adapter_lora_info la;
|
|
222
|
+
la.path = lora_path;
|
|
223
|
+
la.scale = lora_scaled;
|
|
224
|
+
la.ptr = llama_adapter_lora_init(model, lora_path.c_str());
|
|
225
|
+
if (la.ptr == nullptr) {
|
|
226
|
+
Napi::TypeError::New(env, "Failed to load lora adapter")
|
|
227
|
+
.ThrowAsJavaScriptException();
|
|
228
|
+
}
|
|
229
|
+
lora.push_back(la);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (options.Has("lora_list") && options.Get("lora_list").IsArray()) {
|
|
233
|
+
auto lora_list = options.Get("lora_list").As<Napi::Array>();
|
|
234
|
+
if (lora_list != nullptr) {
|
|
235
|
+
int lora_list_size = lora_list.Length();
|
|
236
|
+
for (int i = 0; i < lora_list_size; i++) {
|
|
237
|
+
auto lora_adapter = lora_list.Get(i).As<Napi::Object>();
|
|
238
|
+
auto path = lora_adapter.Get("path").ToString();
|
|
239
|
+
if (path != nullptr) {
|
|
240
|
+
common_adapter_lora_info la;
|
|
241
|
+
la.path = path;
|
|
242
|
+
la.scale = lora_adapter.Get("scaled").ToNumber().FloatValue();
|
|
243
|
+
la.ptr = llama_adapter_lora_init(model, path.Utf8Value().c_str());
|
|
244
|
+
if (la.ptr == nullptr) {
|
|
245
|
+
Napi::TypeError::New(env, "Failed to load lora adapter")
|
|
246
|
+
.ThrowAsJavaScriptException();
|
|
247
|
+
}
|
|
248
|
+
lora.push_back(la);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
common_set_adapter_lora(ctx, lora);
|
|
254
|
+
_lora = lora;
|
|
255
|
+
|
|
256
|
+
_sess = sess;
|
|
151
257
|
_info = common_params_get_system_info(params);
|
|
152
258
|
}
|
|
153
259
|
|
|
@@ -162,8 +268,8 @@ bool validateModelChatTemplate(const struct llama_model * model) {
|
|
|
162
268
|
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
|
163
269
|
if (res >= 0) {
|
|
164
270
|
llama_chat_message chat[] = {{"user", "test"}};
|
|
165
|
-
|
|
166
|
-
int32_t chat_res = llama_chat_apply_template(
|
|
271
|
+
const char * tmpl = llama_model_chat_template(model);
|
|
272
|
+
int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
|
|
167
273
|
return chat_res > 0;
|
|
168
274
|
}
|
|
169
275
|
return res > 0;
|
|
@@ -187,6 +293,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
187
293
|
}
|
|
188
294
|
Napi::Object details = Napi::Object::New(info.Env());
|
|
189
295
|
details.Set("desc", desc);
|
|
296
|
+
details.Set("nEmbd", llama_model_n_embd(model));
|
|
190
297
|
details.Set("nParams", llama_model_n_params(model));
|
|
191
298
|
details.Set("size", llama_model_size(model));
|
|
192
299
|
details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
|
|
@@ -396,6 +503,49 @@ Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
|
|
|
396
503
|
return worker->Promise();
|
|
397
504
|
}
|
|
398
505
|
|
|
506
|
+
// applyLoraAdapters(lora_adapters: [{ path: string, scaled: number }]): void
|
|
507
|
+
void LlamaContext::ApplyLoraAdapters(const Napi::CallbackInfo &info) {
|
|
508
|
+
Napi::Env env = info.Env();
|
|
509
|
+
std::vector<common_adapter_lora_info> lora;
|
|
510
|
+
auto lora_adapters = info[0].As<Napi::Array>();
|
|
511
|
+
for (size_t i = 0; i < lora_adapters.Length(); i++) {
|
|
512
|
+
auto lora_adapter = lora_adapters.Get(i).As<Napi::Object>();
|
|
513
|
+
auto path = lora_adapter.Get("path").ToString().Utf8Value();
|
|
514
|
+
auto scaled = lora_adapter.Get("scaled").ToNumber().FloatValue();
|
|
515
|
+
common_adapter_lora_info la;
|
|
516
|
+
la.path = path;
|
|
517
|
+
la.scale = scaled;
|
|
518
|
+
la.ptr = llama_adapter_lora_init(_sess->model(), path.c_str());
|
|
519
|
+
if (la.ptr == nullptr) {
|
|
520
|
+
Napi::TypeError::New(env, "Failed to load lora adapter")
|
|
521
|
+
.ThrowAsJavaScriptException();
|
|
522
|
+
}
|
|
523
|
+
lora.push_back(la);
|
|
524
|
+
}
|
|
525
|
+
common_set_adapter_lora(_sess->context(), lora);
|
|
526
|
+
_lora = lora;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// removeLoraAdapters(): void
|
|
530
|
+
void LlamaContext::RemoveLoraAdapters(const Napi::CallbackInfo &info) {
|
|
531
|
+
_lora.clear();
|
|
532
|
+
common_set_adapter_lora(_sess->context(), _lora);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// getLoadedLoraAdapters(): Promise<{ count, lora_adapters: [{ path: string,
|
|
536
|
+
// scaled: number }] }>
|
|
537
|
+
Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info) {
|
|
538
|
+
Napi::Env env = info.Env();
|
|
539
|
+
Napi::Array lora_adapters = Napi::Array::New(env, _lora.size());
|
|
540
|
+
for (size_t i = 0; i < _lora.size(); i++) {
|
|
541
|
+
Napi::Object lora_adapter = Napi::Object::New(env);
|
|
542
|
+
lora_adapter.Set("path", _lora[i].path);
|
|
543
|
+
lora_adapter.Set("scaled", _lora[i].scale);
|
|
544
|
+
lora_adapters.Set(i, lora_adapter);
|
|
545
|
+
}
|
|
546
|
+
return lora_adapters;
|
|
547
|
+
}
|
|
548
|
+
|
|
399
549
|
// release(): Promise<void>
|
|
400
550
|
Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
|
|
401
551
|
auto env = info.Env();
|
package/src/LlamaContext.h
CHANGED
|
@@ -5,6 +5,7 @@ class LlamaCompletionWorker;
|
|
|
5
5
|
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
6
6
|
public:
|
|
7
7
|
LlamaContext(const Napi::CallbackInfo &info);
|
|
8
|
+
static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
|
|
8
9
|
static void Init(Napi::Env env, Napi::Object &exports);
|
|
9
10
|
|
|
10
11
|
private:
|
|
@@ -18,10 +19,14 @@ private:
|
|
|
18
19
|
Napi::Value Embedding(const Napi::CallbackInfo &info);
|
|
19
20
|
Napi::Value SaveSession(const Napi::CallbackInfo &info);
|
|
20
21
|
Napi::Value LoadSession(const Napi::CallbackInfo &info);
|
|
22
|
+
void ApplyLoraAdapters(const Napi::CallbackInfo &info);
|
|
23
|
+
void RemoveLoraAdapters(const Napi::CallbackInfo &info);
|
|
24
|
+
Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
|
|
21
25
|
Napi::Value Release(const Napi::CallbackInfo &info);
|
|
22
26
|
|
|
23
27
|
std::string _info;
|
|
24
28
|
Napi::Object _meta;
|
|
25
29
|
LlamaSessionPtr _sess = nullptr;
|
|
30
|
+
std::vector<common_adapter_lora_info> _lora;
|
|
26
31
|
LlamaCompletionWorker *_wip = nullptr;
|
|
27
32
|
};
|
package/src/common.hpp
CHANGED
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
#include <tuple>
|
|
12
12
|
#include <vector>
|
|
13
13
|
|
|
14
|
-
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
15
|
-
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
16
14
|
typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
17
15
|
LlamaCppSampling;
|
|
18
16
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
@@ -47,17 +45,17 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
|
47
45
|
|
|
48
46
|
class LlamaSession {
|
|
49
47
|
public:
|
|
50
|
-
LlamaSession(
|
|
51
|
-
:
|
|
52
|
-
|
|
48
|
+
LlamaSession(common_params params)
|
|
49
|
+
: params_(params) {
|
|
50
|
+
llama_init_ = common_init_from_params(params);
|
|
53
51
|
tokens_.reserve(params.n_ctx);
|
|
54
52
|
}
|
|
55
53
|
|
|
56
54
|
~LlamaSession() { dispose(); }
|
|
57
55
|
|
|
58
|
-
inline llama_context *context() { return
|
|
56
|
+
inline llama_context *context() { return llama_init_.context.get(); }
|
|
59
57
|
|
|
60
|
-
inline llama_model *model() { return
|
|
58
|
+
inline llama_model *model() { return llama_init_.model.get(); }
|
|
61
59
|
|
|
62
60
|
inline std::vector<llama_token> *tokens_ptr() { return &tokens_; }
|
|
63
61
|
|
|
@@ -72,13 +70,10 @@ public:
|
|
|
72
70
|
void dispose() {
|
|
73
71
|
std::lock_guard<std::mutex> lock(mutex);
|
|
74
72
|
tokens_.clear();
|
|
75
|
-
ctx_.reset();
|
|
76
|
-
model_.reset();
|
|
77
73
|
}
|
|
78
74
|
|
|
79
75
|
private:
|
|
80
|
-
|
|
81
|
-
LlamaCppContext ctx_;
|
|
76
|
+
common_init_result llama_init_;
|
|
82
77
|
const common_params params_;
|
|
83
78
|
std::vector<llama_token> tokens_{};
|
|
84
79
|
std::mutex mutex;
|
|
@@ -60,8 +60,7 @@ jobs:
|
|
|
60
60
|
-DLLAMA_CURL=ON \
|
|
61
61
|
-DGGML_METAL_USE_BF16=ON \
|
|
62
62
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
63
|
-
-DGGML_RPC=ON
|
|
64
|
-
-DBUILD_SHARED_LIBS=OFF
|
|
63
|
+
-DGGML_RPC=ON
|
|
65
64
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
66
65
|
|
|
67
66
|
- name: Test
|
|
@@ -88,6 +87,7 @@ jobs:
|
|
|
88
87
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
89
88
|
run: |
|
|
90
89
|
cp LICENSE ./build/bin/
|
|
90
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
91
91
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
|
92
92
|
|
|
93
93
|
- name: Upload artifacts
|
|
@@ -123,8 +123,7 @@ jobs:
|
|
|
123
123
|
-DLLAMA_FATAL_WARNINGS=ON \
|
|
124
124
|
-DLLAMA_CURL=ON \
|
|
125
125
|
-DGGML_METAL=OFF \
|
|
126
|
-
-DGGML_RPC=ON
|
|
127
|
-
-DBUILD_SHARED_LIBS=OFF
|
|
126
|
+
-DGGML_RPC=ON
|
|
128
127
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
129
128
|
|
|
130
129
|
- name: Test
|
|
@@ -151,6 +150,7 @@ jobs:
|
|
|
151
150
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
152
151
|
run: |
|
|
153
152
|
cp LICENSE ./build/bin/
|
|
153
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
154
154
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
|
155
155
|
|
|
156
156
|
- name: Upload artifacts
|
|
@@ -181,7 +181,7 @@ jobs:
|
|
|
181
181
|
run: |
|
|
182
182
|
mkdir build
|
|
183
183
|
cd build
|
|
184
|
-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
|
|
184
|
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
|
|
185
185
|
cmake --build . --config Release -j $(nproc)
|
|
186
186
|
|
|
187
187
|
- name: Test
|
|
@@ -219,6 +219,7 @@ jobs:
|
|
|
219
219
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
220
220
|
run: |
|
|
221
221
|
cp LICENSE ./build/bin/
|
|
222
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
222
223
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
|
|
223
224
|
|
|
224
225
|
- name: Upload artifacts
|
|
@@ -236,7 +237,7 @@ jobs:
|
|
|
236
237
|
strategy:
|
|
237
238
|
matrix:
|
|
238
239
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
|
239
|
-
build_type: [Debug
|
|
240
|
+
build_type: [Debug]
|
|
240
241
|
|
|
241
242
|
steps:
|
|
242
243
|
- name: Clone
|
|
@@ -651,23 +652,23 @@ jobs:
|
|
|
651
652
|
matrix:
|
|
652
653
|
include:
|
|
653
654
|
- build: 'noavx-x64'
|
|
654
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF
|
|
655
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
|
|
655
656
|
- build: 'avx2-x64'
|
|
656
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON
|
|
657
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
|
|
657
658
|
- build: 'avx-x64'
|
|
658
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF
|
|
659
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
|
|
659
660
|
- build: 'avx512-x64'
|
|
660
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON
|
|
661
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
|
|
661
662
|
- build: 'openblas-x64'
|
|
662
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -
|
|
663
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
663
664
|
- build: 'kompute-x64'
|
|
664
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON
|
|
665
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
|
|
665
666
|
- build: 'vulkan-x64'
|
|
666
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON
|
|
667
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
|
|
667
668
|
- build: 'llvm-arm64'
|
|
668
|
-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON
|
|
669
|
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
|
669
670
|
- build: 'msvc-arm64'
|
|
670
|
-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON
|
|
671
|
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
|
671
672
|
- build: 'llvm-arm64-opencl-adreno'
|
|
672
673
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
|
|
673
674
|
|
|
@@ -798,6 +799,7 @@ jobs:
|
|
|
798
799
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
799
800
|
run: |
|
|
800
801
|
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
|
|
802
|
+
Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
|
|
801
803
|
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
|
|
802
804
|
|
|
803
805
|
- name: Upload artifacts
|
|
@@ -914,7 +916,7 @@ jobs:
|
|
|
914
916
|
shell: cmd
|
|
915
917
|
run: |
|
|
916
918
|
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
|
917
|
-
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -
|
|
919
|
+
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
|
|
918
920
|
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
|
919
921
|
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
|
920
922
|
cmake --build build --config Release
|
|
@@ -1239,7 +1241,7 @@ jobs:
|
|
|
1239
1241
|
|
|
1240
1242
|
- name: Create release
|
|
1241
1243
|
id: create_release
|
|
1242
|
-
uses:
|
|
1244
|
+
uses: ggml-org/action-create-release@v1
|
|
1243
1245
|
env:
|
|
1244
1246
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
1245
1247
|
with:
|