@fugood/llama.node 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -10
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +6 -4
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
package/CMakeLists.txt
CHANGED
|
@@ -62,16 +62,7 @@ if (VULKAN_SDK)
|
|
|
62
62
|
find_package(Vulkan REQUIRED)
|
|
63
63
|
endif()
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
add_custom_target(
|
|
68
|
-
patch ALL
|
|
69
|
-
COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true
|
|
70
|
-
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
|
|
71
|
-
COMMENT "Applying patches"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
|
|
65
|
+
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
75
66
|
add_subdirectory("src/llama.cpp")
|
|
76
67
|
|
|
77
68
|
include_directories(
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.2",
|
|
5
5
|
"description": "Llama.cpp for Node.js",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
"build-native": "cmake-js compile",
|
|
12
12
|
"clean": "rimraf build",
|
|
13
13
|
"prepare": "husky",
|
|
14
|
-
"commitlint": "commitlint --edit"
|
|
14
|
+
"commitlint": "commitlint --edit",
|
|
15
|
+
"release": "release-it"
|
|
15
16
|
},
|
|
16
17
|
"repository": {
|
|
17
18
|
"type": "git",
|
|
@@ -51,11 +52,12 @@
|
|
|
51
52
|
"@commitlint/cli": "^19.3.0",
|
|
52
53
|
"@commitlint/config-conventional": "^19.2.2",
|
|
53
54
|
"@types/jest": "^29.5.12",
|
|
54
|
-
"@types/node": "^
|
|
55
|
+
"@types/node": "^22.0.0",
|
|
55
56
|
"cmake-js": "^7.3.0",
|
|
56
57
|
"husky": "^9.0.11",
|
|
57
58
|
"jest": "^29.7.0",
|
|
58
|
-
"
|
|
59
|
+
"release-it": "^17.7.0",
|
|
60
|
+
"rimraf": "^6.0.1",
|
|
59
61
|
"typescript": "^5.4.5",
|
|
60
62
|
"wait-for-expect": "^3.0.2"
|
|
61
63
|
},
|
|
@@ -59,13 +59,13 @@ void LlamaCompletionWorker::Execute() {
|
|
|
59
59
|
size_t n_cur = 0;
|
|
60
60
|
size_t n_input = 0;
|
|
61
61
|
const auto model = _sess->model();
|
|
62
|
-
const bool add_bos =
|
|
62
|
+
const bool add_bos = llama_add_bos_token(model);
|
|
63
63
|
auto ctx = _sess->context();
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
auto sparams = llama_sampler_chain_default_params();
|
|
66
66
|
|
|
67
|
-
LlamaCppSampling sampling{
|
|
68
|
-
|
|
67
|
+
LlamaCppSampling sampling{gpt_sampler_init(model, _params.sparams),
|
|
68
|
+
gpt_sampler_free};
|
|
69
69
|
|
|
70
70
|
std::vector<llama_token> prompt_tokens =
|
|
71
71
|
::llama_tokenize(ctx, _params.prompt, add_bos);
|
|
@@ -109,8 +109,8 @@ void LlamaCompletionWorker::Execute() {
|
|
|
109
109
|
}
|
|
110
110
|
// sample the next token
|
|
111
111
|
const llama_token new_token_id =
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
gpt_sampler_sample(sampling.get(), ctx, -1);
|
|
113
|
+
gpt_sampler_accept(sampling.get(), new_token_id, true);
|
|
114
114
|
// prepare the next batch
|
|
115
115
|
embd->emplace_back(new_token_id);
|
|
116
116
|
auto token = llama_token_to_piece(ctx, new_token_id);
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -75,7 +75,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
75
75
|
params.embedding = get_option<bool>(options, "embedding", false);
|
|
76
76
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
77
77
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
78
|
-
params.n_threads =
|
|
78
|
+
params.cpuparams.n_threads =
|
|
79
79
|
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
|
|
80
80
|
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
|
|
81
81
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
@@ -86,16 +86,14 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
86
86
|
llama_backend_init();
|
|
87
87
|
llama_numa_init(params.numa);
|
|
88
88
|
|
|
89
|
-
|
|
90
|
-
llama_context *ctx;
|
|
91
|
-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
|
89
|
+
auto result = llama_init_from_gpt_params(params);
|
|
92
90
|
|
|
93
|
-
if (model == nullptr ||
|
|
91
|
+
if (result.model == nullptr || result.context == nullptr) {
|
|
94
92
|
Napi::TypeError::New(env, "Failed to load model")
|
|
95
93
|
.ThrowAsJavaScriptException();
|
|
96
94
|
}
|
|
97
95
|
|
|
98
|
-
_sess = std::make_shared<LlamaSession>(model,
|
|
96
|
+
_sess = std::make_shared<LlamaSession>(result.model, result.context, params);
|
|
99
97
|
_info = gpt_params_get_system_info(params);
|
|
100
98
|
}
|
|
101
99
|
|
|
@@ -167,11 +165,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
167
165
|
params.sparams.penalty_present =
|
|
168
166
|
get_option<float>(options, "penalty_present", 0.00f);
|
|
169
167
|
params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
|
|
170
|
-
params.sparams.
|
|
171
|
-
params.ignore_eos = get_option<float>(options, "ignore_eos", false);
|
|
168
|
+
params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
|
|
169
|
+
params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
|
|
172
170
|
params.sparams.grammar = get_option<std::string>(options, "grammar", "");
|
|
173
171
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
174
|
-
params.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
172
|
+
params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
175
173
|
std::vector<std::string> stop_words;
|
|
176
174
|
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
177
175
|
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
package/src/common.hpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "common/common.h"
|
|
4
|
+
#include "common/sampling.h"
|
|
4
5
|
#include "llama.h"
|
|
5
6
|
#include <memory>
|
|
6
7
|
#include <mutex>
|
|
@@ -12,7 +13,7 @@
|
|
|
12
13
|
|
|
13
14
|
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
14
15
|
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
15
|
-
typedef std::unique_ptr<
|
|
16
|
+
typedef std::unique_ptr<gpt_sampler, decltype(&gpt_sampler_free)>
|
|
16
17
|
LlamaCppSampling;
|
|
17
18
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
18
19
|
|
|
@@ -19,10 +19,18 @@ concurrency:
|
|
|
19
19
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
20
20
|
cancel-in-progress: true
|
|
21
21
|
|
|
22
|
+
# Fine-grant permission
|
|
23
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
24
|
+
permissions:
|
|
25
|
+
contents: write # for creating release
|
|
26
|
+
|
|
22
27
|
env:
|
|
23
28
|
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
24
29
|
GGML_NLOOP: 3
|
|
25
30
|
GGML_N_THREADS: 1
|
|
31
|
+
LLAMA_LOG_COLORS: 1
|
|
32
|
+
LLAMA_LOG_PREFIX: 1
|
|
33
|
+
LLAMA_LOG_TIMESTAMPS: 1
|
|
26
34
|
|
|
27
35
|
jobs:
|
|
28
36
|
macOS-latest-cmake-arm64:
|
|
@@ -47,7 +55,7 @@ jobs:
|
|
|
47
55
|
sysctl -a
|
|
48
56
|
mkdir build
|
|
49
57
|
cd build
|
|
50
|
-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
|
|
58
|
+
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
|
|
51
59
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
52
60
|
|
|
53
61
|
- name: Test
|
|
@@ -105,7 +113,7 @@ jobs:
|
|
|
105
113
|
sysctl -a
|
|
106
114
|
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
|
107
115
|
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
|
108
|
-
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
|
|
116
|
+
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
|
|
109
117
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
110
118
|
|
|
111
119
|
- name: Test
|
|
@@ -222,7 +230,7 @@ jobs:
|
|
|
222
230
|
run: |
|
|
223
231
|
mkdir build
|
|
224
232
|
cd build
|
|
225
|
-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
|
|
233
|
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
|
|
226
234
|
cmake --build . --config Release -j $(nproc)
|
|
227
235
|
|
|
228
236
|
- name: Test
|
|
@@ -375,7 +383,7 @@ jobs:
|
|
|
375
383
|
steps:
|
|
376
384
|
- name: Clone
|
|
377
385
|
id: checkout
|
|
378
|
-
uses: actions/checkout@
|
|
386
|
+
uses: actions/checkout@v4
|
|
379
387
|
|
|
380
388
|
- name: Dependencies
|
|
381
389
|
id: depends
|
|
@@ -401,7 +409,7 @@ jobs:
|
|
|
401
409
|
continue-on-error: true
|
|
402
410
|
|
|
403
411
|
steps:
|
|
404
|
-
- uses: actions/checkout@
|
|
412
|
+
- uses: actions/checkout@v4
|
|
405
413
|
|
|
406
414
|
- name: add oneAPI to apt
|
|
407
415
|
shell: bash
|
|
@@ -442,7 +450,7 @@ jobs:
|
|
|
442
450
|
continue-on-error: true
|
|
443
451
|
|
|
444
452
|
steps:
|
|
445
|
-
- uses: actions/checkout@
|
|
453
|
+
- uses: actions/checkout@v4
|
|
446
454
|
|
|
447
455
|
- name: add oneAPI to apt
|
|
448
456
|
shell: bash
|
|
@@ -546,7 +554,7 @@ jobs:
|
|
|
546
554
|
steps:
|
|
547
555
|
- name: Clone
|
|
548
556
|
id: checkout
|
|
549
|
-
uses: actions/checkout@
|
|
557
|
+
uses: actions/checkout@v4
|
|
550
558
|
|
|
551
559
|
- name: Dependencies
|
|
552
560
|
id: depends
|
|
@@ -576,7 +584,7 @@ jobs:
|
|
|
576
584
|
steps:
|
|
577
585
|
- name: Clone
|
|
578
586
|
id: checkout
|
|
579
|
-
uses: actions/checkout@
|
|
587
|
+
uses: actions/checkout@v4
|
|
580
588
|
|
|
581
589
|
- name: Dependencies
|
|
582
590
|
id: depends
|
|
@@ -610,7 +618,7 @@ jobs:
|
|
|
610
618
|
steps:
|
|
611
619
|
- name: Clone
|
|
612
620
|
id: checkout
|
|
613
|
-
uses: actions/checkout@
|
|
621
|
+
uses: actions/checkout@v4
|
|
614
622
|
|
|
615
623
|
- name: Dependencies
|
|
616
624
|
id: depends
|
|
@@ -696,22 +704,20 @@ jobs:
|
|
|
696
704
|
strategy:
|
|
697
705
|
matrix:
|
|
698
706
|
include:
|
|
699
|
-
- build: 'rpc-x64'
|
|
700
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
|
701
707
|
- build: 'noavx-x64'
|
|
702
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
|
708
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
|
703
709
|
- build: 'avx2-x64'
|
|
704
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
|
710
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
|
705
711
|
- build: 'avx-x64'
|
|
706
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
|
712
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
|
707
713
|
- build: 'avx512-x64'
|
|
708
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
|
714
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
|
709
715
|
- build: 'openblas-x64'
|
|
710
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
716
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
711
717
|
- build: 'kompute-x64'
|
|
712
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
|
718
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
|
713
719
|
- build: 'vulkan-x64'
|
|
714
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
|
720
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
|
715
721
|
- build: 'llvm-arm64'
|
|
716
722
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
|
717
723
|
- build: 'msvc-arm64'
|
|
@@ -859,8 +865,9 @@ jobs:
|
|
|
859
865
|
run: |
|
|
860
866
|
mkdir build
|
|
861
867
|
cd build
|
|
862
|
-
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
|
|
863
|
-
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
|
|
868
|
+
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
|
|
869
|
+
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
|
|
870
|
+
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
|
864
871
|
|
|
865
872
|
- name: Determine tag name
|
|
866
873
|
id: tag
|
|
@@ -954,6 +961,7 @@ jobs:
|
|
|
954
961
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
|
|
955
962
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
|
956
963
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
|
964
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
|
957
965
|
echo "cp oneAPI running time dll files to ./build/bin done"
|
|
958
966
|
7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
|
|
959
967
|
|
|
@@ -965,19 +973,56 @@ jobs:
|
|
|
965
973
|
name: llama-bin-win-sycl-x64.zip
|
|
966
974
|
|
|
967
975
|
windows-latest-cmake-hip:
|
|
976
|
+
if: ${{ github.event.inputs.create_release != 'true' }}
|
|
977
|
+
runs-on: windows-latest
|
|
978
|
+
|
|
979
|
+
steps:
|
|
980
|
+
- name: Clone
|
|
981
|
+
id: checkout
|
|
982
|
+
uses: actions/checkout@v4
|
|
983
|
+
|
|
984
|
+
- name: Install
|
|
985
|
+
id: depends
|
|
986
|
+
run: |
|
|
987
|
+
$ErrorActionPreference = "Stop"
|
|
988
|
+
write-host "Downloading AMD HIP SDK Installer"
|
|
989
|
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
|
990
|
+
write-host "Installing AMD HIP SDK"
|
|
991
|
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
|
992
|
+
write-host "Completed AMD HIP SDK installation"
|
|
993
|
+
|
|
994
|
+
- name: Verify ROCm
|
|
995
|
+
id: verify
|
|
996
|
+
run: |
|
|
997
|
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
|
998
|
+
|
|
999
|
+
- name: Build
|
|
1000
|
+
id: cmake_build
|
|
1001
|
+
run: |
|
|
1002
|
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
|
1003
|
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
|
1004
|
+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
|
|
1005
|
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
1006
|
+
|
|
1007
|
+
windows-latest-cmake-hip-release:
|
|
1008
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
968
1009
|
runs-on: windows-latest
|
|
969
1010
|
|
|
1011
|
+
strategy:
|
|
1012
|
+
matrix:
|
|
1013
|
+
gpu_target: [gfx1100, gfx1101, gfx1030]
|
|
1014
|
+
|
|
970
1015
|
steps:
|
|
971
1016
|
- name: Clone
|
|
972
1017
|
id: checkout
|
|
973
|
-
uses: actions/checkout@
|
|
1018
|
+
uses: actions/checkout@v4
|
|
974
1019
|
|
|
975
1020
|
- name: Install
|
|
976
1021
|
id: depends
|
|
977
1022
|
run: |
|
|
978
1023
|
$ErrorActionPreference = "Stop"
|
|
979
1024
|
write-host "Downloading AMD HIP SDK Installer"
|
|
980
|
-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-
|
|
1025
|
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
|
981
1026
|
write-host "Installing AMD HIP SDK"
|
|
982
1027
|
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
|
983
1028
|
write-host "Completed AMD HIP SDK installation"
|
|
@@ -992,8 +1037,36 @@ jobs:
|
|
|
992
1037
|
run: |
|
|
993
1038
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
|
994
1039
|
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
|
995
|
-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
|
|
996
|
-
cmake --build build
|
|
1040
|
+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
|
|
1041
|
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
1042
|
+
md "build\bin\rocblas\library\"
|
|
1043
|
+
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
|
|
1044
|
+
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
|
|
1045
|
+
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
|
|
1046
|
+
|
|
1047
|
+
- name: Determine tag name
|
|
1048
|
+
id: tag
|
|
1049
|
+
shell: bash
|
|
1050
|
+
run: |
|
|
1051
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
1052
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
1053
|
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
1054
|
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
1055
|
+
else
|
|
1056
|
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
1057
|
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
1058
|
+
fi
|
|
1059
|
+
|
|
1060
|
+
- name: Pack artifacts
|
|
1061
|
+
id: pack_artifacts
|
|
1062
|
+
run: |
|
|
1063
|
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
|
|
1064
|
+
|
|
1065
|
+
- name: Upload artifacts
|
|
1066
|
+
uses: actions/upload-artifact@v4
|
|
1067
|
+
with:
|
|
1068
|
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
|
1069
|
+
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
|
997
1070
|
|
|
998
1071
|
ios-xcode-build:
|
|
999
1072
|
runs-on: macos-latest
|
|
@@ -1058,6 +1131,7 @@ jobs:
|
|
|
1058
1131
|
- macOS-latest-cmake
|
|
1059
1132
|
- windows-latest-cmake
|
|
1060
1133
|
- windows-latest-cmake-cuda
|
|
1134
|
+
- windows-latest-cmake-hip-release
|
|
1061
1135
|
- macOS-latest-cmake-arm64
|
|
1062
1136
|
- macOS-latest-cmake-x64
|
|
1063
1137
|
|
|
@@ -3,6 +3,11 @@ on:
|
|
|
3
3
|
schedule:
|
|
4
4
|
- cron: "42 0 * * *"
|
|
5
5
|
|
|
6
|
+
# Fine-grant permission
|
|
7
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
8
|
+
permissions:
|
|
9
|
+
issues: write
|
|
10
|
+
|
|
6
11
|
jobs:
|
|
7
12
|
close-issues:
|
|
8
13
|
runs-on: ubuntu-latest
|
|
@@ -15,11 +15,17 @@ on:
|
|
|
15
15
|
branches:
|
|
16
16
|
- master
|
|
17
17
|
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
|
18
|
+
workflow_dispatch: # allows manual triggering, useful for debugging
|
|
18
19
|
|
|
19
20
|
concurrency:
|
|
20
21
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
21
22
|
cancel-in-progress: true
|
|
22
23
|
|
|
24
|
+
# Fine-grant permission
|
|
25
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
26
|
+
permissions:
|
|
27
|
+
packages: write
|
|
28
|
+
|
|
23
29
|
jobs:
|
|
24
30
|
push_to_registry:
|
|
25
31
|
name: Push Docker image to Docker Hub
|
|
@@ -37,15 +43,17 @@ jobs:
|
|
|
37
43
|
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
38
44
|
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
39
45
|
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
|
47
|
+
#- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
48
|
+
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
43
49
|
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
44
50
|
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
|
45
51
|
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
|
46
52
|
steps:
|
|
47
53
|
- name: Check out the repo
|
|
48
54
|
uses: actions/checkout@v4
|
|
55
|
+
with:
|
|
56
|
+
fetch-depth: 0 # preserve git history, so we can determine the build number
|
|
49
57
|
|
|
50
58
|
- name: Set up QEMU
|
|
51
59
|
uses: docker/setup-qemu-action@v2
|
|
@@ -60,6 +68,34 @@ jobs:
|
|
|
60
68
|
username: ${{ github.repository_owner }}
|
|
61
69
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
62
70
|
|
|
71
|
+
- name: Determine tag name
|
|
72
|
+
id: tag
|
|
73
|
+
shell: bash
|
|
74
|
+
run: |
|
|
75
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
76
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
77
|
+
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
|
78
|
+
REPO_NAME="${{ github.event.repository.name }}"
|
|
79
|
+
|
|
80
|
+
# determine tag name postfix (build number, commit hash)
|
|
81
|
+
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
|
82
|
+
TAG_POSTFIX="b${BUILD_NUMBER}"
|
|
83
|
+
else
|
|
84
|
+
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
|
85
|
+
TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
|
|
86
|
+
fi
|
|
87
|
+
|
|
88
|
+
# list all tags possible
|
|
89
|
+
TAGS=""
|
|
90
|
+
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
|
|
91
|
+
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
|
|
92
|
+
|
|
93
|
+
echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
|
|
94
|
+
echo "output_tags=$TAGS" # print out for debugging
|
|
95
|
+
env:
|
|
96
|
+
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
97
|
+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
98
|
+
|
|
63
99
|
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
|
64
100
|
- name: Free Disk Space (Ubuntu)
|
|
65
101
|
uses: jlumbroso/free-disk-space@main
|
|
@@ -77,40 +113,13 @@ jobs:
|
|
|
77
113
|
docker-images: true
|
|
78
114
|
swap-storage: true
|
|
79
115
|
|
|
80
|
-
- name:
|
|
81
|
-
id: tag
|
|
82
|
-
shell: bash
|
|
83
|
-
run: |
|
|
84
|
-
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
85
|
-
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
86
|
-
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
87
|
-
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
88
|
-
else
|
|
89
|
-
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
90
|
-
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
91
|
-
fi
|
|
92
|
-
|
|
93
|
-
- name: Downcase github.repository_owner
|
|
94
|
-
run: |
|
|
95
|
-
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
|
|
96
|
-
env:
|
|
97
|
-
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
98
|
-
|
|
99
|
-
- name: Build and push Docker image (versioned)
|
|
116
|
+
- name: Build and push Docker image (tagged + versioned)
|
|
100
117
|
if: github.event_name == 'push'
|
|
101
|
-
uses: docker/build-push-action@
|
|
118
|
+
uses: docker/build-push-action@v6
|
|
102
119
|
with:
|
|
103
120
|
context: .
|
|
104
121
|
push: true
|
|
105
122
|
platforms: ${{ matrix.config.platforms }}
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
- name: Build and push Docker image (tagged)
|
|
110
|
-
uses: docker/build-push-action@v4
|
|
111
|
-
with:
|
|
112
|
-
context: .
|
|
113
|
-
push: ${{ github.event_name == 'push' }}
|
|
114
|
-
platforms: ${{ matrix.config.platforms }}
|
|
115
|
-
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
|
123
|
+
# tag list is generated from step above
|
|
124
|
+
tags: ${{ steps.tag.outputs.output_tags }}
|
|
116
125
|
file: ${{ matrix.config.dockerfile }}
|
|
@@ -21,6 +21,13 @@ concurrency:
|
|
|
21
21
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
22
22
|
cancel-in-progress: true
|
|
23
23
|
|
|
24
|
+
# Fine-grant permission
|
|
25
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
26
|
+
permissions:
|
|
27
|
+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
|
|
28
|
+
id-token: write
|
|
29
|
+
contents: read
|
|
30
|
+
|
|
24
31
|
jobs:
|
|
25
32
|
nix-build-aarch64:
|
|
26
33
|
runs-on: ubuntu-latest
|
|
@@ -12,6 +12,13 @@ concurrency:
|
|
|
12
12
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
13
13
|
cancel-in-progress: true
|
|
14
14
|
|
|
15
|
+
# Fine-grant permission
|
|
16
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
17
|
+
permissions:
|
|
18
|
+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
|
|
19
|
+
id-token: write
|
|
20
|
+
contents: read
|
|
21
|
+
|
|
15
22
|
jobs:
|
|
16
23
|
nix-eval:
|
|
17
24
|
strategy:
|
|
@@ -6,15 +6,13 @@ on:
|
|
|
6
6
|
- '.github/workflows/python-check-requirements.yml'
|
|
7
7
|
- 'scripts/check-requirements.sh'
|
|
8
8
|
- 'convert*.py'
|
|
9
|
-
- 'requirements
|
|
10
|
-
- 'requirements/*.txt'
|
|
9
|
+
- '**/requirements*.txt'
|
|
11
10
|
pull_request:
|
|
12
11
|
paths:
|
|
13
12
|
- '.github/workflows/python-check-requirements.yml'
|
|
14
13
|
- 'scripts/check-requirements.sh'
|
|
15
14
|
- 'convert*.py'
|
|
16
|
-
- 'requirements
|
|
17
|
-
- 'requirements/*.txt'
|
|
15
|
+
- '**/requirements*.txt'
|
|
18
16
|
|
|
19
17
|
concurrency:
|
|
20
18
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
@@ -4,11 +4,13 @@ on:
|
|
|
4
4
|
push:
|
|
5
5
|
paths:
|
|
6
6
|
- '.github/workflows/python-type-check.yml'
|
|
7
|
+
- 'pyrightconfig.json'
|
|
7
8
|
- '**.py'
|
|
8
9
|
- '**/requirements*.txt'
|
|
9
10
|
pull_request:
|
|
10
11
|
paths:
|
|
11
12
|
- '.github/workflows/python-type-check.yml'
|
|
13
|
+
- 'pyrightconfig.json'
|
|
12
14
|
- '**.py'
|
|
13
15
|
- '**/requirements*.txt'
|
|
14
16
|
|
|
@@ -33,6 +35,6 @@ jobs:
|
|
|
33
35
|
- name: Type-check with Pyright
|
|
34
36
|
uses: jakebailey/pyright-action@v2
|
|
35
37
|
with:
|
|
36
|
-
version: 1.1.
|
|
38
|
+
version: 1.1.382
|
|
37
39
|
level: warning
|
|
38
40
|
warnings: true
|
|
@@ -20,6 +20,12 @@ on:
|
|
|
20
20
|
types: [opened, synchronize, reopened]
|
|
21
21
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
|
22
22
|
|
|
23
|
+
env:
|
|
24
|
+
LLAMA_LOG_COLORS: 1
|
|
25
|
+
LLAMA_LOG_PREFIX: 1
|
|
26
|
+
LLAMA_LOG_TIMESTAMPS: 1
|
|
27
|
+
LLAMA_LOG_VERBOSITY: 10
|
|
28
|
+
|
|
23
29
|
concurrency:
|
|
24
30
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
|
25
31
|
cancel-in-progress: true
|
|
@@ -173,6 +179,7 @@ jobs:
|
|
|
173
179
|
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
|
174
180
|
run: |
|
|
175
181
|
cd examples/server/tests
|
|
182
|
+
$env:PYTHONIOENCODING = ":replace"
|
|
176
183
|
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
|
177
184
|
|
|
178
185
|
- name: Slow tests
|