@fugood/llama.node 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -0
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
package/README.md
CHANGED
|
@@ -4,9 +4,23 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/@fugood/llama.node)
|
|
5
5
|

|
|
6
6
|
|
|
7
|
-
Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
|
|
7
|
+
An another Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp) to make same API with [llama.rn](https://github.com/mybigday/llama.rn) as much as possible.
|
|
8
8
|
|
|
9
|
-
[llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
|
9
|
+
- [llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
|
10
|
+
- [llama.rn](https://github.com/mybigday/llama.rn): React Native binding of llama.cpp
|
|
11
|
+
|
|
12
|
+
## Platform Support
|
|
13
|
+
|
|
14
|
+
- macOS
|
|
15
|
+
- arm64: CPU and Metal GPU acceleration
|
|
16
|
+
- x86_64: CPU only
|
|
17
|
+
- Windows (x86_64 and arm64)
|
|
18
|
+
- CPU
|
|
19
|
+
- GPU acceleration via Vulkan
|
|
20
|
+
- Linux (x86_64 and arm64)
|
|
21
|
+
- CPU
|
|
22
|
+
- GPU acceleration via Vulkan
|
|
23
|
+
- GPU acceleration via CUDA
|
|
10
24
|
|
|
11
25
|
## Installation
|
|
12
26
|
|
|
@@ -49,6 +63,7 @@ console.log('Result:', text)
|
|
|
49
63
|
|
|
50
64
|
- [x] `default`: General usage, not support GPU except macOS (Metal)
|
|
51
65
|
- [x] `vulkan`: Support GPU Vulkan (Windows/Linux), but some scenario might unstable
|
|
66
|
+
- [x] `cuda`: Support GPU CUDA (Linux), but only for limited capability (x86_64: 8.9, arm64: 8.7)
|
|
52
67
|
|
|
53
68
|
## License
|
|
54
69
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -111,6 +111,8 @@ export interface LlamaContext {
|
|
|
111
111
|
saveSession(path: string): Promise<void>
|
|
112
112
|
loadSession(path: string): Promise<void>
|
|
113
113
|
release(): Promise<void>
|
|
114
|
+
// static
|
|
115
|
+
loadModelInfo(path: string, skip: string[]): Promise<Object>
|
|
114
116
|
}
|
|
115
117
|
|
|
116
118
|
export interface Module {
|
package/lib/index.js
CHANGED
|
@@ -23,7 +23,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
23
23
|
});
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.loadModel = void 0;
|
|
26
|
+
exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
|
|
27
27
|
const binding_1 = require("./binding");
|
|
28
28
|
__exportStar(require("./binding"), exports);
|
|
29
29
|
const mods = {};
|
|
@@ -34,3 +34,18 @@ const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
34
34
|
return new mods[variant].LlamaContext(options);
|
|
35
35
|
});
|
|
36
36
|
exports.loadModel = loadModel;
|
|
37
|
+
exports.initLlama = binding_1.loadModule;
|
|
38
|
+
const modelInfoSkip = [
|
|
39
|
+
// Large fields
|
|
40
|
+
'tokenizer.ggml.tokens',
|
|
41
|
+
'tokenizer.ggml.token_type',
|
|
42
|
+
'tokenizer.ggml.merges',
|
|
43
|
+
'tokenizer.ggml.scores',
|
|
44
|
+
];
|
|
45
|
+
const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function* () {
|
|
46
|
+
var _a;
|
|
47
|
+
const variant = 'default';
|
|
48
|
+
(_a = mods[variant]) !== null && _a !== void 0 ? _a : (mods[variant] = yield (0, binding_1.loadModule)(variant));
|
|
49
|
+
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
|
|
50
|
+
});
|
|
51
|
+
exports.loadLlamaModelInfo = loadLlamaModelInfo;
|
package/lib/index.ts
CHANGED
|
@@ -14,3 +14,19 @@ export const loadModel = async (options: LlamaModelOptionsExtended): Promise<Lla
|
|
|
14
14
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
15
15
|
return new mods[variant].LlamaContext(options)
|
|
16
16
|
}
|
|
17
|
+
|
|
18
|
+
export const initLlama = loadModule
|
|
19
|
+
|
|
20
|
+
const modelInfoSkip = [
|
|
21
|
+
// Large fields
|
|
22
|
+
'tokenizer.ggml.tokens',
|
|
23
|
+
'tokenizer.ggml.token_type',
|
|
24
|
+
'tokenizer.ggml.merges',
|
|
25
|
+
'tokenizer.ggml.scores',
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
|
|
29
|
+
const variant = 'default'
|
|
30
|
+
mods[variant] ??= await loadModule(variant)
|
|
31
|
+
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
|
|
32
|
+
}
|
package/package.json
CHANGED
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -9,10 +9,11 @@ void EmbeddingWorker::Execute() {
|
|
|
9
9
|
llama_kv_cache_clear(_sess->context());
|
|
10
10
|
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
11
11
|
// add SEP if not present
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
auto vocab = llama_model_get_vocab(_sess->model());
|
|
13
|
+
if (tokens.empty() || tokens.back() != llama_vocab_sep(vocab)) {
|
|
14
|
+
tokens.push_back(llama_vocab_sep(vocab));
|
|
14
15
|
}
|
|
15
|
-
const int n_embd =
|
|
16
|
+
const int n_embd = llama_model_n_embd(_sess->model());
|
|
16
17
|
do {
|
|
17
18
|
auto ctx = _sess->context();
|
|
18
19
|
int ret =
|
|
@@ -59,7 +59,9 @@ void LlamaCompletionWorker::Execute() {
|
|
|
59
59
|
size_t n_cur = 0;
|
|
60
60
|
size_t n_input = 0;
|
|
61
61
|
const auto model = _sess->model();
|
|
62
|
-
|
|
62
|
+
auto vocab = llama_model_get_vocab(model);
|
|
63
|
+
|
|
64
|
+
const bool add_bos = llama_vocab_get_add_bos(vocab);
|
|
63
65
|
auto ctx = _sess->context();
|
|
64
66
|
|
|
65
67
|
auto sparams = llama_sampler_chain_default_params();
|
|
@@ -130,7 +132,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
130
132
|
});
|
|
131
133
|
}
|
|
132
134
|
// is it an end of generation?
|
|
133
|
-
if (
|
|
135
|
+
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
|
134
136
|
break;
|
|
135
137
|
}
|
|
136
138
|
// check for stop words
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#include "ggml.h"
|
|
2
|
+
#include "gguf.h"
|
|
3
|
+
#include "llama-impl.h"
|
|
2
4
|
#include "LlamaContext.h"
|
|
3
5
|
#include "DetokenizeWorker.h"
|
|
4
6
|
#include "DisposeWorker.h"
|
|
@@ -8,6 +10,56 @@
|
|
|
8
10
|
#include "SaveSessionWorker.h"
|
|
9
11
|
#include "TokenizeWorker.h"
|
|
10
12
|
|
|
13
|
+
// loadModelInfo(path: string): object
|
|
14
|
+
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
15
|
+
Napi::Env env = info.Env();
|
|
16
|
+
struct gguf_init_params params = {
|
|
17
|
+
/*.no_alloc = */ false,
|
|
18
|
+
/*.ctx = */ NULL,
|
|
19
|
+
};
|
|
20
|
+
std::string path = info[0].ToString().Utf8Value();
|
|
21
|
+
|
|
22
|
+
// Convert Napi::Array to vector<string>
|
|
23
|
+
std::vector<std::string> skip;
|
|
24
|
+
if (info.Length() > 1 && info[1].IsArray()) {
|
|
25
|
+
Napi::Array skipArray = info[1].As<Napi::Array>();
|
|
26
|
+
for (uint32_t i = 0; i < skipArray.Length(); i++) {
|
|
27
|
+
skip.push_back(skipArray.Get(i).ToString().Utf8Value());
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
|
|
32
|
+
|
|
33
|
+
Napi::Object metadata = Napi::Object::New(env);
|
|
34
|
+
if (std::find(skip.begin(), skip.end(), "version") == skip.end()) {
|
|
35
|
+
metadata.Set("version", Napi::Number::New(env, gguf_get_version(ctx)));
|
|
36
|
+
}
|
|
37
|
+
if (std::find(skip.begin(), skip.end(), "alignment") == skip.end()) {
|
|
38
|
+
metadata.Set("alignment", Napi::Number::New(env, gguf_get_alignment(ctx)));
|
|
39
|
+
}
|
|
40
|
+
if (std::find(skip.begin(), skip.end(), "data_offset") == skip.end()) {
|
|
41
|
+
metadata.Set("data_offset", Napi::Number::New(env, gguf_get_data_offset(ctx)));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// kv
|
|
45
|
+
{
|
|
46
|
+
const int n_kv = gguf_get_n_kv(ctx);
|
|
47
|
+
|
|
48
|
+
for (int i = 0; i < n_kv; ++i) {
|
|
49
|
+
const char * key = gguf_get_key(ctx, i);
|
|
50
|
+
if (std::find(skip.begin(), skip.end(), key) != skip.end()) {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
const std::string value = gguf_kv_to_str(ctx, i);
|
|
54
|
+
metadata.Set(key, Napi::String::New(env, value.c_str()));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
gguf_free(ctx);
|
|
59
|
+
|
|
60
|
+
return metadata;
|
|
61
|
+
}
|
|
62
|
+
|
|
11
63
|
std::vector<common_chat_msg> get_messages(Napi::Array messages) {
|
|
12
64
|
std::vector<common_chat_msg> chat;
|
|
13
65
|
for (size_t i = 0; i < messages.Length(); i++) {
|
|
@@ -52,7 +104,10 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
52
104
|
"loadSession",
|
|
53
105
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
54
106
|
InstanceMethod<&LlamaContext::Release>(
|
|
55
|
-
"release", static_cast<napi_property_attributes>(napi_enumerable))
|
|
107
|
+
"release", static_cast<napi_property_attributes>(napi_enumerable)),
|
|
108
|
+
StaticMethod<&LlamaContext::ModelInfo>(
|
|
109
|
+
"loadModelInfo",
|
|
110
|
+
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
56
111
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
57
112
|
*constructor = Napi::Persistent(func);
|
|
58
113
|
#if NAPI_VERSION > 5
|
|
@@ -140,14 +195,14 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
140
195
|
llama_backend_init();
|
|
141
196
|
llama_numa_init(params.numa);
|
|
142
197
|
|
|
143
|
-
auto
|
|
198
|
+
auto sess = std::make_shared<LlamaSession>(params);
|
|
144
199
|
|
|
145
|
-
if (
|
|
200
|
+
if (sess->model() == nullptr || sess->context() == nullptr) {
|
|
146
201
|
Napi::TypeError::New(env, "Failed to load model")
|
|
147
202
|
.ThrowAsJavaScriptException();
|
|
148
203
|
}
|
|
149
204
|
|
|
150
|
-
_sess =
|
|
205
|
+
_sess = sess;
|
|
151
206
|
_info = common_params_get_system_info(params);
|
|
152
207
|
}
|
|
153
208
|
|
|
@@ -162,8 +217,8 @@ bool validateModelChatTemplate(const struct llama_model * model) {
|
|
|
162
217
|
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
|
163
218
|
if (res >= 0) {
|
|
164
219
|
llama_chat_message chat[] = {{"user", "test"}};
|
|
165
|
-
|
|
166
|
-
int32_t chat_res = llama_chat_apply_template(
|
|
220
|
+
const char * tmpl = llama_model_chat_template(model);
|
|
221
|
+
int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
|
|
167
222
|
return chat_res > 0;
|
|
168
223
|
}
|
|
169
224
|
return res > 0;
|
package/src/LlamaContext.h
CHANGED
|
@@ -5,6 +5,7 @@ class LlamaCompletionWorker;
|
|
|
5
5
|
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
6
6
|
public:
|
|
7
7
|
LlamaContext(const Napi::CallbackInfo &info);
|
|
8
|
+
static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
|
|
8
9
|
static void Init(Napi::Env env, Napi::Object &exports);
|
|
9
10
|
|
|
10
11
|
private:
|
package/src/common.hpp
CHANGED
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
#include <tuple>
|
|
12
12
|
#include <vector>
|
|
13
13
|
|
|
14
|
-
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
15
|
-
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
16
14
|
typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
17
15
|
LlamaCppSampling;
|
|
18
16
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
@@ -47,17 +45,17 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
|
47
45
|
|
|
48
46
|
class LlamaSession {
|
|
49
47
|
public:
|
|
50
|
-
LlamaSession(
|
|
51
|
-
:
|
|
52
|
-
|
|
48
|
+
LlamaSession(common_params params)
|
|
49
|
+
: params_(params) {
|
|
50
|
+
llama_init_ = common_init_from_params(params);
|
|
53
51
|
tokens_.reserve(params.n_ctx);
|
|
54
52
|
}
|
|
55
53
|
|
|
56
54
|
~LlamaSession() { dispose(); }
|
|
57
55
|
|
|
58
|
-
inline llama_context *context() { return
|
|
56
|
+
inline llama_context *context() { return llama_init_.context.get(); }
|
|
59
57
|
|
|
60
|
-
inline llama_model *model() { return
|
|
58
|
+
inline llama_model *model() { return llama_init_.model.get(); }
|
|
61
59
|
|
|
62
60
|
inline std::vector<llama_token> *tokens_ptr() { return &tokens_; }
|
|
63
61
|
|
|
@@ -72,13 +70,10 @@ public:
|
|
|
72
70
|
void dispose() {
|
|
73
71
|
std::lock_guard<std::mutex> lock(mutex);
|
|
74
72
|
tokens_.clear();
|
|
75
|
-
ctx_.reset();
|
|
76
|
-
model_.reset();
|
|
77
73
|
}
|
|
78
74
|
|
|
79
75
|
private:
|
|
80
|
-
|
|
81
|
-
LlamaCppContext ctx_;
|
|
76
|
+
common_init_result llama_init_;
|
|
82
77
|
const common_params params_;
|
|
83
78
|
std::vector<llama_token> tokens_{};
|
|
84
79
|
std::mutex mutex;
|
|
@@ -60,8 +60,7 @@ jobs:
|
|
|
60
60
|
-DLLAMA_CURL=ON \
|
|
61
61
|
-DGGML_METAL_USE_BF16=ON \
|
|
62
62
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
63
|
-
-DGGML_RPC=ON
|
|
64
|
-
-DBUILD_SHARED_LIBS=OFF
|
|
63
|
+
-DGGML_RPC=ON
|
|
65
64
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
66
65
|
|
|
67
66
|
- name: Test
|
|
@@ -88,6 +87,7 @@ jobs:
|
|
|
88
87
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
89
88
|
run: |
|
|
90
89
|
cp LICENSE ./build/bin/
|
|
90
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
91
91
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
|
92
92
|
|
|
93
93
|
- name: Upload artifacts
|
|
@@ -123,8 +123,7 @@ jobs:
|
|
|
123
123
|
-DLLAMA_FATAL_WARNINGS=ON \
|
|
124
124
|
-DLLAMA_CURL=ON \
|
|
125
125
|
-DGGML_METAL=OFF \
|
|
126
|
-
-DGGML_RPC=ON
|
|
127
|
-
-DBUILD_SHARED_LIBS=OFF
|
|
126
|
+
-DGGML_RPC=ON
|
|
128
127
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
129
128
|
|
|
130
129
|
- name: Test
|
|
@@ -151,6 +150,7 @@ jobs:
|
|
|
151
150
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
152
151
|
run: |
|
|
153
152
|
cp LICENSE ./build/bin/
|
|
153
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
154
154
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
|
155
155
|
|
|
156
156
|
- name: Upload artifacts
|
|
@@ -181,7 +181,7 @@ jobs:
|
|
|
181
181
|
run: |
|
|
182
182
|
mkdir build
|
|
183
183
|
cd build
|
|
184
|
-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
|
|
184
|
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
|
|
185
185
|
cmake --build . --config Release -j $(nproc)
|
|
186
186
|
|
|
187
187
|
- name: Test
|
|
@@ -219,6 +219,7 @@ jobs:
|
|
|
219
219
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
220
220
|
run: |
|
|
221
221
|
cp LICENSE ./build/bin/
|
|
222
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
222
223
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
|
|
223
224
|
|
|
224
225
|
- name: Upload artifacts
|
|
@@ -236,7 +237,7 @@ jobs:
|
|
|
236
237
|
strategy:
|
|
237
238
|
matrix:
|
|
238
239
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
|
239
|
-
build_type: [Debug
|
|
240
|
+
build_type: [Debug]
|
|
240
241
|
|
|
241
242
|
steps:
|
|
242
243
|
- name: Clone
|
|
@@ -651,23 +652,23 @@ jobs:
|
|
|
651
652
|
matrix:
|
|
652
653
|
include:
|
|
653
654
|
- build: 'noavx-x64'
|
|
654
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF
|
|
655
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
|
|
655
656
|
- build: 'avx2-x64'
|
|
656
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON
|
|
657
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
|
|
657
658
|
- build: 'avx-x64'
|
|
658
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF
|
|
659
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
|
|
659
660
|
- build: 'avx512-x64'
|
|
660
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON
|
|
661
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
|
|
661
662
|
- build: 'openblas-x64'
|
|
662
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -
|
|
663
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
663
664
|
- build: 'kompute-x64'
|
|
664
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON
|
|
665
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
|
|
665
666
|
- build: 'vulkan-x64'
|
|
666
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON
|
|
667
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
|
|
667
668
|
- build: 'llvm-arm64'
|
|
668
|
-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON
|
|
669
|
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
|
669
670
|
- build: 'msvc-arm64'
|
|
670
|
-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON
|
|
671
|
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
|
671
672
|
- build: 'llvm-arm64-opencl-adreno'
|
|
672
673
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
|
|
673
674
|
|
|
@@ -798,6 +799,7 @@ jobs:
|
|
|
798
799
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
799
800
|
run: |
|
|
800
801
|
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
|
|
802
|
+
Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
|
|
801
803
|
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
|
|
802
804
|
|
|
803
805
|
- name: Upload artifacts
|
|
@@ -914,7 +916,7 @@ jobs:
|
|
|
914
916
|
shell: cmd
|
|
915
917
|
run: |
|
|
916
918
|
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
|
917
|
-
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -
|
|
919
|
+
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
|
|
918
920
|
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
|
919
921
|
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
|
920
922
|
cmake --build build --config Release
|
|
@@ -1239,7 +1241,7 @@ jobs:
|
|
|
1239
1241
|
|
|
1240
1242
|
- name: Create release
|
|
1241
1243
|
id: create_release
|
|
1242
|
-
uses:
|
|
1244
|
+
uses: ggml-org/action-create-release@v1
|
|
1243
1245
|
env:
|
|
1244
1246
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
1245
1247
|
with:
|
|
@@ -34,21 +34,14 @@ jobs:
|
|
|
34
34
|
strategy:
|
|
35
35
|
matrix:
|
|
36
36
|
config:
|
|
37
|
-
|
|
38
|
-
- { tag: "
|
|
39
|
-
- { tag: "
|
|
40
|
-
- { tag: "
|
|
41
|
-
- { tag: "
|
|
42
|
-
- { tag: "
|
|
43
|
-
- { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
|
|
44
|
-
- { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
|
|
45
|
-
- { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
|
|
37
|
+
# Multi-stage build
|
|
38
|
+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
|
39
|
+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
|
40
|
+
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
|
41
|
+
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
|
42
|
+
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
|
46
43
|
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
|
47
|
-
#- {
|
|
48
|
-
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
49
|
-
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
50
|
-
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
|
51
|
-
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
|
44
|
+
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
|
52
45
|
steps:
|
|
53
46
|
- name: Check out the repo
|
|
54
47
|
uses: actions/checkout@v4
|
|
@@ -56,10 +49,10 @@ jobs:
|
|
|
56
49
|
fetch-depth: 0 # preserve git history, so we can determine the build number
|
|
57
50
|
|
|
58
51
|
- name: Set up QEMU
|
|
59
|
-
uses: docker/setup-qemu-action@
|
|
52
|
+
uses: docker/setup-qemu-action@v3
|
|
60
53
|
|
|
61
54
|
- name: Set up Docker Buildx
|
|
62
|
-
uses: docker/setup-buildx-action@
|
|
55
|
+
uses: docker/setup-buildx-action@v3
|
|
63
56
|
|
|
64
57
|
- name: Log in to Docker Hub
|
|
65
58
|
uses: docker/login-action@v2
|
|
@@ -79,26 +72,34 @@ jobs:
|
|
|
79
72
|
|
|
80
73
|
# determine tag name postfix (build number, commit hash)
|
|
81
74
|
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
|
82
|
-
TAG_POSTFIX="b${BUILD_NUMBER}"
|
|
75
|
+
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
|
83
76
|
else
|
|
84
77
|
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
|
85
|
-
TAG_POSTFIX="
|
|
78
|
+
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
|
86
79
|
fi
|
|
87
|
-
|
|
88
80
|
# list all tags possible
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
81
|
+
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
|
82
|
+
TYPE=""
|
|
83
|
+
else
|
|
84
|
+
TYPE="-${{ matrix.config.tag }}"
|
|
85
|
+
fi
|
|
86
|
+
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
|
87
|
+
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
|
88
|
+
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
|
89
|
+
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
|
90
|
+
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
|
91
|
+
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
|
92
|
+
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
|
93
|
+
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
|
94
|
+
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
|
95
|
+
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
|
95
96
|
env:
|
|
96
97
|
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
97
98
|
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
98
99
|
|
|
99
|
-
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
|
100
100
|
- name: Free Disk Space (Ubuntu)
|
|
101
|
-
|
|
101
|
+
if: ${{ matrix.config.free_disk_space == true }}
|
|
102
|
+
uses: ggml-org/free-disk-space@v1.3.1
|
|
102
103
|
with:
|
|
103
104
|
# this might remove tools that are actually needed,
|
|
104
105
|
# if set to "true" but frees about 6 GB
|
|
@@ -113,13 +114,59 @@ jobs:
|
|
|
113
114
|
docker-images: true
|
|
114
115
|
swap-storage: true
|
|
115
116
|
|
|
116
|
-
- name: Build and push Docker image (tagged + versioned)
|
|
117
|
-
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
|
117
|
+
- name: Build and push Full Docker image (tagged + versioned)
|
|
118
|
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
|
119
|
+
uses: docker/build-push-action@v6
|
|
120
|
+
with:
|
|
121
|
+
context: .
|
|
122
|
+
push: true
|
|
123
|
+
platforms: ${{ matrix.config.platforms }}
|
|
124
|
+
# tag list is generated from step above
|
|
125
|
+
tags: ${{ steps.tag.outputs.full_output_tags }}
|
|
126
|
+
file: ${{ matrix.config.dockerfile }}
|
|
127
|
+
target: full
|
|
128
|
+
provenance: false
|
|
129
|
+
# using github experimental cache
|
|
130
|
+
cache-from: type=gha
|
|
131
|
+
cache-to: type=gha,mode=max
|
|
132
|
+
# return to this if the experimental github cache is having issues
|
|
133
|
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
|
134
|
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
|
135
|
+
|
|
136
|
+
- name: Build and push Light Docker image (tagged + versioned)
|
|
137
|
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
|
138
|
+
uses: docker/build-push-action@v6
|
|
139
|
+
with:
|
|
140
|
+
context: .
|
|
141
|
+
push: true
|
|
142
|
+
platforms: ${{ matrix.config.platforms }}
|
|
143
|
+
# tag list is generated from step above
|
|
144
|
+
tags: ${{ steps.tag.outputs.light_output_tags }}
|
|
145
|
+
file: ${{ matrix.config.dockerfile }}
|
|
146
|
+
target: light
|
|
147
|
+
provenance: false
|
|
148
|
+
# using github experimental cache
|
|
149
|
+
cache-from: type=gha
|
|
150
|
+
cache-to: type=gha,mode=max
|
|
151
|
+
# return to this if the experimental github cache is having issues
|
|
152
|
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
|
153
|
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
|
154
|
+
|
|
155
|
+
- name: Build and push Server Docker image (tagged + versioned)
|
|
156
|
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
|
118
157
|
uses: docker/build-push-action@v6
|
|
119
158
|
with:
|
|
120
159
|
context: .
|
|
121
160
|
push: true
|
|
122
161
|
platforms: ${{ matrix.config.platforms }}
|
|
123
162
|
# tag list is generated from step above
|
|
124
|
-
tags: ${{ steps.tag.outputs.
|
|
163
|
+
tags: ${{ steps.tag.outputs.server_output_tags }}
|
|
125
164
|
file: ${{ matrix.config.dockerfile }}
|
|
165
|
+
target: server
|
|
166
|
+
provenance: false
|
|
167
|
+
# using github experimental cache
|
|
168
|
+
cache-from: type=gha
|
|
169
|
+
cache-to: type=gha,mode=max
|
|
170
|
+
# return to this if the experimental github cache is having issues
|
|
171
|
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
|
172
|
+
#cache-from: type=local,src=/tmp/.buildx-cache
|