@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/CMakeLists.txt +1 -10
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +6 -4
  17. package/src/LlamaCompletionWorker.cpp +6 -6
  18. package/src/LlamaContext.cpp +7 -9
  19. package/src/common.hpp +2 -1
  20. package/src/llama.cpp/.github/workflows/build.yml +98 -24
  21. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  22. package/src/llama.cpp/.github/workflows/docker.yml +43 -34
  23. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  24. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  25. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  26. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  27. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  28. package/src/llama.cpp/CMakeLists.txt +20 -8
  29. package/src/llama.cpp/common/CMakeLists.txt +12 -10
  30. package/src/llama.cpp/common/arg.cpp +2006 -0
  31. package/src/llama.cpp/common/arg.h +77 -0
  32. package/src/llama.cpp/common/common.cpp +496 -1632
  33. package/src/llama.cpp/common/common.h +161 -63
  34. package/src/llama.cpp/common/console.cpp +3 -0
  35. package/src/llama.cpp/common/log.cpp +401 -0
  36. package/src/llama.cpp/common/log.h +66 -698
  37. package/src/llama.cpp/common/ngram-cache.cpp +3 -0
  38. package/src/llama.cpp/common/sampling.cpp +348 -350
  39. package/src/llama.cpp/common/sampling.h +62 -139
  40. package/src/llama.cpp/common/stb_image.h +5990 -6398
  41. package/src/llama.cpp/common/train.cpp +2 -0
  42. package/src/llama.cpp/docs/build.md +36 -1
  43. package/src/llama.cpp/examples/CMakeLists.txt +0 -1
  44. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
  45. package/src/llama.cpp/examples/batched/batched.cpp +39 -55
  46. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
  47. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  48. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
  49. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  50. package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
  51. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
  52. package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
  53. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  54. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
  55. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  57. package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
  58. package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
  59. package/src/llama.cpp/examples/infill/infill.cpp +117 -132
  60. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
  61. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
  62. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  63. package/src/llama.cpp/examples/llava/clip.cpp +685 -150
  64. package/src/llama.cpp/examples/llava/clip.h +11 -2
  65. package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
  66. package/src/llama.cpp/examples/llava/llava.cpp +110 -24
  67. package/src/llama.cpp/examples/llava/llava.h +2 -3
  68. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  69. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  70. package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
  71. package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
  72. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
  73. package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
  74. package/src/llama.cpp/examples/main/main.cpp +210 -262
  75. package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
  76. package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
  77. package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
  78. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  80. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
  81. package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
  82. package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
  83. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
  84. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
  85. package/src/llama.cpp/examples/server/server.cpp +1027 -1073
  86. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  87. package/src/llama.cpp/examples/server/utils.hpp +107 -105
  88. package/src/llama.cpp/examples/simple/simple.cpp +35 -41
  89. package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
  90. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  91. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  92. package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
  93. package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
  94. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  95. package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
  96. package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
  97. package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
  98. package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
  99. package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
  100. package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
  101. package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
  102. package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
  103. package/src/llama.cpp/ggml/include/ggml.h +293 -186
  104. package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
  105. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
  106. package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
  107. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
  108. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
  109. package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
  110. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  111. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  112. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  113. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  114. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  116. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  117. package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
  118. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
  120. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  121. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  122. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  123. package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
  124. package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
  125. package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
  126. package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
  127. package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
  128. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  129. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
  130. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  132. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  134. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  135. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  136. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  137. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
  138. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  141. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
  142. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
  143. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  144. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  145. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  146. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
  148. package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
  149. package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
  150. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
  151. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
  152. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
  153. package/src/llama.cpp/include/llama.h +241 -264
  154. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  155. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  156. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  157. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  158. package/src/llama.cpp/src/llama-grammar.h +120 -15
  159. package/src/llama.cpp/src/llama-impl.h +156 -1
  160. package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
  161. package/src/llama.cpp/src/llama-sampling.h +20 -47
  162. package/src/llama.cpp/src/llama-vocab.cpp +343 -120
  163. package/src/llama.cpp/src/llama-vocab.h +33 -17
  164. package/src/llama.cpp/src/llama.cpp +4247 -1525
  165. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  166. package/src/llama.cpp/src/unicode-data.h +4 -4
  167. package/src/llama.cpp/src/unicode.cpp +15 -7
  168. package/src/llama.cpp/tests/CMakeLists.txt +3 -0
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
  171. package/src/llama.cpp/tests/test-barrier.cpp +93 -0
  172. package/src/llama.cpp/tests/test-grad0.cpp +187 -70
  173. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  174. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  175. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
  176. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  177. package/src/llama.cpp/tests/test-log.cpp +39 -0
  178. package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
  179. package/src/llama.cpp/tests/test-rope.cpp +1 -1
  180. package/src/llama.cpp/tests/test-sampling.cpp +157 -98
  181. package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
  182. package/patches/llama.patch +0 -22
  183. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  184. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  185. package/src/llama.cpp/common/grammar-parser.h +0 -29
  186. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  187. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
package/CMakeLists.txt CHANGED
@@ -62,16 +62,7 @@ if (VULKAN_SDK)
62
62
  find_package(Vulkan REQUIRED)
63
63
  endif()
64
64
 
65
- find_program(PATCH patch REQUIRED)
66
-
67
- add_custom_target(
68
- patch ALL
69
- COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true
70
- WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
71
- COMMENT "Applying patches"
72
- )
73
-
74
- set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
65
+ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
75
66
  add_subdirectory("src/llama.cpp")
76
67
 
77
68
  include_directories(
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.0",
4
+ "version": "0.3.2",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -11,7 +11,8 @@
11
11
  "build-native": "cmake-js compile",
12
12
  "clean": "rimraf build",
13
13
  "prepare": "husky",
14
- "commitlint": "commitlint --edit"
14
+ "commitlint": "commitlint --edit",
15
+ "release": "release-it"
15
16
  },
16
17
  "repository": {
17
18
  "type": "git",
@@ -51,11 +52,12 @@
51
52
  "@commitlint/cli": "^19.3.0",
52
53
  "@commitlint/config-conventional": "^19.2.2",
53
54
  "@types/jest": "^29.5.12",
54
- "@types/node": "^20.12.7",
55
+ "@types/node": "^22.0.0",
55
56
  "cmake-js": "^7.3.0",
56
57
  "husky": "^9.0.11",
57
58
  "jest": "^29.7.0",
58
- "rimraf": "^5.0.5",
59
+ "release-it": "^17.7.0",
60
+ "rimraf": "^6.0.1",
59
61
  "typescript": "^5.4.5",
60
62
  "wait-for-expect": "^3.0.2"
61
63
  },
@@ -59,13 +59,13 @@ void LlamaCompletionWorker::Execute() {
59
59
  size_t n_cur = 0;
60
60
  size_t n_input = 0;
61
61
  const auto model = _sess->model();
62
- const bool add_bos = llama_should_add_bos_token(model);
62
+ const bool add_bos = llama_add_bos_token(model);
63
63
  auto ctx = _sess->context();
64
64
 
65
- llama_set_rng_seed(ctx, _params.seed);
65
+ auto sparams = llama_sampler_chain_default_params();
66
66
 
67
- LlamaCppSampling sampling{llama_sampling_init(_params.sparams),
68
- llama_sampling_free};
67
+ LlamaCppSampling sampling{gpt_sampler_init(model, _params.sparams),
68
+ gpt_sampler_free};
69
69
 
70
70
  std::vector<llama_token> prompt_tokens =
71
71
  ::llama_tokenize(ctx, _params.prompt, add_bos);
@@ -109,8 +109,8 @@ void LlamaCompletionWorker::Execute() {
109
109
  }
110
110
  // sample the next token
111
111
  const llama_token new_token_id =
112
- llama_sampling_sample(sampling.get(), ctx, nullptr);
113
- llama_sampling_accept(sampling.get(), ctx, new_token_id, true);
112
+ gpt_sampler_sample(sampling.get(), ctx, -1);
113
+ gpt_sampler_accept(sampling.get(), new_token_id, true);
114
114
  // prepare the next batch
115
115
  embd->emplace_back(new_token_id);
116
116
  auto token = llama_token_to_piece(ctx, new_token_id);
@@ -75,7 +75,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
75
75
  params.embedding = get_option<bool>(options, "embedding", false);
76
76
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
77
77
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
78
- params.n_threads =
78
+ params.cpuparams.n_threads =
79
79
  get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
80
80
  params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
81
81
  params.use_mlock = get_option<bool>(options, "use_mlock", false);
@@ -86,16 +86,14 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
86
86
  llama_backend_init();
87
87
  llama_numa_init(params.numa);
88
88
 
89
- llama_model *model;
90
- llama_context *ctx;
91
- std::tie(model, ctx) = llama_init_from_gpt_params(params);
89
+ auto result = llama_init_from_gpt_params(params);
92
90
 
93
- if (model == nullptr || ctx == nullptr) {
91
+ if (result.model == nullptr || result.context == nullptr) {
94
92
  Napi::TypeError::New(env, "Failed to load model")
95
93
  .ThrowAsJavaScriptException();
96
94
  }
97
95
 
98
- _sess = std::make_shared<LlamaSession>(model, ctx, params);
96
+ _sess = std::make_shared<LlamaSession>(result.model, result.context, params);
99
97
  _info = gpt_params_get_system_info(params);
100
98
  }
101
99
 
@@ -167,11 +165,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
167
165
  params.sparams.penalty_present =
168
166
  get_option<float>(options, "penalty_present", 0.00f);
169
167
  params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
170
- params.sparams.typical_p = get_option<float>(options, "typical_p", 1.00f);
171
- params.ignore_eos = get_option<float>(options, "ignore_eos", false);
168
+ params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
169
+ params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
172
170
  params.sparams.grammar = get_option<std::string>(options, "grammar", "");
173
171
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
174
- params.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
172
+ params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
175
173
  std::vector<std::string> stop_words;
176
174
  if (options.Has("stop") && options.Get("stop").IsArray()) {
177
175
  auto stop_words_array = options.Get("stop").As<Napi::Array>();
package/src/common.hpp CHANGED
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "common/common.h"
4
+ #include "common/sampling.h"
4
5
  #include "llama.h"
5
6
  #include <memory>
6
7
  #include <mutex>
@@ -12,7 +13,7 @@
12
13
 
13
14
  typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
14
15
  typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
15
- typedef std::unique_ptr<llama_sampling_context, decltype(&llama_sampling_free)>
16
+ typedef std::unique_ptr<gpt_sampler, decltype(&gpt_sampler_free)>
16
17
  LlamaCppSampling;
17
18
  typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
18
19
 
@@ -19,10 +19,18 @@ concurrency:
19
19
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
20
  cancel-in-progress: true
21
21
 
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ contents: write # for creating release
26
+
22
27
  env:
23
28
  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
24
29
  GGML_NLOOP: 3
25
30
  GGML_N_THREADS: 1
31
+ LLAMA_LOG_COLORS: 1
32
+ LLAMA_LOG_PREFIX: 1
33
+ LLAMA_LOG_TIMESTAMPS: 1
26
34
 
27
35
  jobs:
28
36
  macOS-latest-cmake-arm64:
@@ -47,7 +55,7 @@ jobs:
47
55
  sysctl -a
48
56
  mkdir build
49
57
  cd build
50
- cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
58
+ cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
51
59
  cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
52
60
 
53
61
  - name: Test
@@ -105,7 +113,7 @@ jobs:
105
113
  sysctl -a
106
114
  # Metal is disabled due to intermittent failures with Github runners not having a GPU:
107
115
  # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
108
- cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
116
+ cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
109
117
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
110
118
 
111
119
  - name: Test
@@ -222,7 +230,7 @@ jobs:
222
230
  run: |
223
231
  mkdir build
224
232
  cd build
225
- cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
233
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
226
234
  cmake --build . --config Release -j $(nproc)
227
235
 
228
236
  - name: Test
@@ -375,7 +383,7 @@ jobs:
375
383
  steps:
376
384
  - name: Clone
377
385
  id: checkout
378
- uses: actions/checkout@v3
386
+ uses: actions/checkout@v4
379
387
 
380
388
  - name: Dependencies
381
389
  id: depends
@@ -401,7 +409,7 @@ jobs:
401
409
  continue-on-error: true
402
410
 
403
411
  steps:
404
- - uses: actions/checkout@v2
412
+ - uses: actions/checkout@v4
405
413
 
406
414
  - name: add oneAPI to apt
407
415
  shell: bash
@@ -442,7 +450,7 @@ jobs:
442
450
  continue-on-error: true
443
451
 
444
452
  steps:
445
- - uses: actions/checkout@v2
453
+ - uses: actions/checkout@v4
446
454
 
447
455
  - name: add oneAPI to apt
448
456
  shell: bash
@@ -546,7 +554,7 @@ jobs:
546
554
  steps:
547
555
  - name: Clone
548
556
  id: checkout
549
- uses: actions/checkout@v1
557
+ uses: actions/checkout@v4
550
558
 
551
559
  - name: Dependencies
552
560
  id: depends
@@ -576,7 +584,7 @@ jobs:
576
584
  steps:
577
585
  - name: Clone
578
586
  id: checkout
579
- uses: actions/checkout@v1
587
+ uses: actions/checkout@v4
580
588
 
581
589
  - name: Dependencies
582
590
  id: depends
@@ -610,7 +618,7 @@ jobs:
610
618
  steps:
611
619
  - name: Clone
612
620
  id: checkout
613
- uses: actions/checkout@v1
621
+ uses: actions/checkout@v4
614
622
 
615
623
  - name: Dependencies
616
624
  id: depends
@@ -696,22 +704,20 @@ jobs:
696
704
  strategy:
697
705
  matrix:
698
706
  include:
699
- - build: 'rpc-x64'
700
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
701
707
  - build: 'noavx-x64'
702
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
708
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
703
709
  - build: 'avx2-x64'
704
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
710
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
705
711
  - build: 'avx-x64'
706
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
712
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
707
713
  - build: 'avx512-x64'
708
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
714
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
709
715
  - build: 'openblas-x64'
710
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
716
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
711
717
  - build: 'kompute-x64'
712
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
718
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
713
719
  - build: 'vulkan-x64'
714
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
720
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
715
721
  - build: 'llvm-arm64'
716
722
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
717
723
  - build: 'msvc-arm64'
@@ -859,8 +865,9 @@ jobs:
859
865
  run: |
860
866
  mkdir build
861
867
  cd build
862
- cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
863
- cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
868
+ cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
869
+ cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
870
+ cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
864
871
 
865
872
  - name: Determine tag name
866
873
  id: tag
@@ -954,6 +961,7 @@ jobs:
954
961
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
955
962
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
956
963
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
964
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
957
965
  echo "cp oneAPI running time dll files to ./build/bin done"
958
966
  7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
959
967
 
@@ -965,19 +973,56 @@ jobs:
965
973
  name: llama-bin-win-sycl-x64.zip
966
974
 
967
975
  windows-latest-cmake-hip:
976
+ if: ${{ github.event.inputs.create_release != 'true' }}
977
+ runs-on: windows-latest
978
+
979
+ steps:
980
+ - name: Clone
981
+ id: checkout
982
+ uses: actions/checkout@v4
983
+
984
+ - name: Install
985
+ id: depends
986
+ run: |
987
+ $ErrorActionPreference = "Stop"
988
+ write-host "Downloading AMD HIP SDK Installer"
989
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
990
+ write-host "Installing AMD HIP SDK"
991
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
992
+ write-host "Completed AMD HIP SDK installation"
993
+
994
+ - name: Verify ROCm
995
+ id: verify
996
+ run: |
997
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
998
+
999
+ - name: Build
1000
+ id: cmake_build
1001
+ run: |
1002
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1003
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1004
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1005
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1006
+
1007
+ windows-latest-cmake-hip-release:
1008
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
968
1009
  runs-on: windows-latest
969
1010
 
1011
+ strategy:
1012
+ matrix:
1013
+ gpu_target: [gfx1100, gfx1101, gfx1030]
1014
+
970
1015
  steps:
971
1016
  - name: Clone
972
1017
  id: checkout
973
- uses: actions/checkout@v3
1018
+ uses: actions/checkout@v4
974
1019
 
975
1020
  - name: Install
976
1021
  id: depends
977
1022
  run: |
978
1023
  $ErrorActionPreference = "Stop"
979
1024
  write-host "Downloading AMD HIP SDK Installer"
980
- Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1025
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
981
1026
  write-host "Installing AMD HIP SDK"
982
1027
  Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
983
1028
  write-host "Completed AMD HIP SDK installation"
@@ -992,8 +1037,36 @@ jobs:
992
1037
  run: |
993
1038
  $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
994
1039
  $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
995
- cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
996
- cmake --build build --config Release
1040
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1041
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1042
+ md "build\bin\rocblas\library\"
1043
+ cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
1044
+ cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
1045
+ cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
1046
+
1047
+ - name: Determine tag name
1048
+ id: tag
1049
+ shell: bash
1050
+ run: |
1051
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1052
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1053
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1054
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1055
+ else
1056
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1057
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1058
+ fi
1059
+
1060
+ - name: Pack artifacts
1061
+ id: pack_artifacts
1062
+ run: |
1063
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
1064
+
1065
+ - name: Upload artifacts
1066
+ uses: actions/upload-artifact@v4
1067
+ with:
1068
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1069
+ name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
997
1070
 
998
1071
  ios-xcode-build:
999
1072
  runs-on: macos-latest
@@ -1058,6 +1131,7 @@ jobs:
1058
1131
  - macOS-latest-cmake
1059
1132
  - windows-latest-cmake
1060
1133
  - windows-latest-cmake-cuda
1134
+ - windows-latest-cmake-hip-release
1061
1135
  - macOS-latest-cmake-arm64
1062
1136
  - macOS-latest-cmake-x64
1063
1137
 
@@ -3,6 +3,11 @@ on:
3
3
  schedule:
4
4
  - cron: "42 0 * * *"
5
5
 
6
+ # Fine-grant permission
7
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8
+ permissions:
9
+ issues: write
10
+
6
11
  jobs:
7
12
  close-issues:
8
13
  runs-on: ubuntu-latest
@@ -15,11 +15,17 @@ on:
15
15
  branches:
16
16
  - master
17
17
  paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
18
+ workflow_dispatch: # allows manual triggering, useful for debugging
18
19
 
19
20
  concurrency:
20
21
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
21
22
  cancel-in-progress: true
22
23
 
24
+ # Fine-grant permission
25
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26
+ permissions:
27
+ packages: write
28
+
23
29
  jobs:
24
30
  push_to_registry:
25
31
  name: Push Docker image to Docker Hub
@@ -37,15 +43,17 @@ jobs:
37
43
  - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
38
44
  - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
39
45
  - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
40
- - { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
41
- - { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
42
- # Note: the full-rocm image is failing due to a "no space left on device" error. It is disabled for now to allow the workflow to complete.
46
+ # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
47
+ #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
48
+ #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
43
49
  #- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
44
50
  - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
45
51
  - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
46
52
  steps:
47
53
  - name: Check out the repo
48
54
  uses: actions/checkout@v4
55
+ with:
56
+ fetch-depth: 0 # preserve git history, so we can determine the build number
49
57
 
50
58
  - name: Set up QEMU
51
59
  uses: docker/setup-qemu-action@v2
@@ -60,6 +68,34 @@ jobs:
60
68
  username: ${{ github.repository_owner }}
61
69
  password: ${{ secrets.GITHUB_TOKEN }}
62
70
 
71
+ - name: Determine tag name
72
+ id: tag
73
+ shell: bash
74
+ run: |
75
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
76
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
77
+ REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
78
+ REPO_NAME="${{ github.event.repository.name }}"
79
+
80
+ # determine tag name postfix (build number, commit hash)
81
+ if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
82
+ TAG_POSTFIX="b${BUILD_NUMBER}"
83
+ else
84
+ SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
85
+ TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
86
+ fi
87
+
88
+ # list all tags possible
89
+ TAGS=""
90
+ TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
91
+ TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
92
+
93
+ echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
94
+ echo "output_tags=$TAGS" # print out for debugging
95
+ env:
96
+ GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
97
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
98
+
63
99
  # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
64
100
  - name: Free Disk Space (Ubuntu)
65
101
  uses: jlumbroso/free-disk-space@main
@@ -77,40 +113,13 @@ jobs:
77
113
  docker-images: true
78
114
  swap-storage: true
79
115
 
80
- - name: Determine tag name
81
- id: tag
82
- shell: bash
83
- run: |
84
- BUILD_NUMBER="$(git rev-list --count HEAD)"
85
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
86
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
87
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
88
- else
89
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
90
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
91
- fi
92
-
93
- - name: Downcase github.repository_owner
94
- run: |
95
- echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
96
- env:
97
- GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
98
-
99
- - name: Build and push Docker image (versioned)
116
+ - name: Build and push Docker image (tagged + versioned)
100
117
  if: github.event_name == 'push'
101
- uses: docker/build-push-action@v4
118
+ uses: docker/build-push-action@v6
102
119
  with:
103
120
  context: .
104
121
  push: true
105
122
  platforms: ${{ matrix.config.platforms }}
106
- tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
107
- file: ${{ matrix.config.dockerfile }}
108
-
109
- - name: Build and push Docker image (tagged)
110
- uses: docker/build-push-action@v4
111
- with:
112
- context: .
113
- push: ${{ github.event_name == 'push' }}
114
- platforms: ${{ matrix.config.platforms }}
115
- tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
123
+ # tag list is generated from step above
124
+ tags: ${{ steps.tag.outputs.output_tags }}
116
125
  file: ${{ matrix.config.dockerfile }}
@@ -21,6 +21,13 @@ concurrency:
21
21
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
22
22
  cancel-in-progress: true
23
23
 
24
+ # Fine-grant permission
25
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26
+ permissions:
27
+ # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
28
+ id-token: write
29
+ contents: read
30
+
24
31
  jobs:
25
32
  nix-build-aarch64:
26
33
  runs-on: ubuntu-latest
@@ -12,6 +12,13 @@ concurrency:
12
12
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
13
13
  cancel-in-progress: true
14
14
 
15
+ # Fine-grant permission
16
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
17
+ permissions:
18
+ # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
19
+ id-token: write
20
+ contents: read
21
+
15
22
  jobs:
16
23
  nix-eval:
17
24
  strategy:
@@ -6,15 +6,13 @@ on:
6
6
  - '.github/workflows/python-check-requirements.yml'
7
7
  - 'scripts/check-requirements.sh'
8
8
  - 'convert*.py'
9
- - 'requirements.txt'
10
- - 'requirements/*.txt'
9
+ - '**/requirements*.txt'
11
10
  pull_request:
12
11
  paths:
13
12
  - '.github/workflows/python-check-requirements.yml'
14
13
  - 'scripts/check-requirements.sh'
15
14
  - 'convert*.py'
16
- - 'requirements.txt'
17
- - 'requirements/*.txt'
15
+ - '**/requirements*.txt'
18
16
 
19
17
  concurrency:
20
18
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -4,11 +4,13 @@ on:
4
4
  push:
5
5
  paths:
6
6
  - '.github/workflows/python-type-check.yml'
7
+ - 'pyrightconfig.json'
7
8
  - '**.py'
8
9
  - '**/requirements*.txt'
9
10
  pull_request:
10
11
  paths:
11
12
  - '.github/workflows/python-type-check.yml'
13
+ - 'pyrightconfig.json'
12
14
  - '**.py'
13
15
  - '**/requirements*.txt'
14
16
 
@@ -33,6 +35,6 @@ jobs:
33
35
  - name: Type-check with Pyright
34
36
  uses: jakebailey/pyright-action@v2
35
37
  with:
36
- version: 1.1.370
38
+ version: 1.1.382
37
39
  level: warning
38
40
  warnings: true
@@ -20,6 +20,12 @@ on:
20
20
  types: [opened, synchronize, reopened]
21
21
  paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
22
22
 
23
+ env:
24
+ LLAMA_LOG_COLORS: 1
25
+ LLAMA_LOG_PREFIX: 1
26
+ LLAMA_LOG_TIMESTAMPS: 1
27
+ LLAMA_LOG_VERBOSITY: 10
28
+
23
29
  concurrency:
24
30
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
25
31
  cancel-in-progress: true
@@ -173,6 +179,7 @@ jobs:
173
179
  if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
174
180
  run: |
175
181
  cd examples/server/tests
182
+ $env:PYTHONIOENCODING = ":replace"
176
183
  behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
177
184
 
178
185
  - name: Slow tests