@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/CMakeLists.txt +1 -10
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +6 -4
  17. package/src/LlamaCompletionWorker.cpp +6 -6
  18. package/src/LlamaContext.cpp +7 -9
  19. package/src/common.hpp +2 -1
  20. package/src/llama.cpp/.github/workflows/build.yml +98 -24
  21. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  22. package/src/llama.cpp/.github/workflows/docker.yml +43 -34
  23. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  24. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  25. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  26. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  27. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  28. package/src/llama.cpp/CMakeLists.txt +20 -8
  29. package/src/llama.cpp/common/CMakeLists.txt +12 -10
  30. package/src/llama.cpp/common/arg.cpp +2006 -0
  31. package/src/llama.cpp/common/arg.h +77 -0
  32. package/src/llama.cpp/common/common.cpp +496 -1632
  33. package/src/llama.cpp/common/common.h +161 -63
  34. package/src/llama.cpp/common/console.cpp +3 -0
  35. package/src/llama.cpp/common/log.cpp +401 -0
  36. package/src/llama.cpp/common/log.h +66 -698
  37. package/src/llama.cpp/common/ngram-cache.cpp +3 -0
  38. package/src/llama.cpp/common/sampling.cpp +348 -350
  39. package/src/llama.cpp/common/sampling.h +62 -139
  40. package/src/llama.cpp/common/stb_image.h +5990 -6398
  41. package/src/llama.cpp/common/train.cpp +2 -0
  42. package/src/llama.cpp/docs/build.md +36 -1
  43. package/src/llama.cpp/examples/CMakeLists.txt +0 -1
  44. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
  45. package/src/llama.cpp/examples/batched/batched.cpp +39 -55
  46. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
  47. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  48. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
  49. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  50. package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
  51. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
  52. package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
  53. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  54. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
  55. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  57. package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
  58. package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
  59. package/src/llama.cpp/examples/infill/infill.cpp +117 -132
  60. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
  61. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
  62. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  63. package/src/llama.cpp/examples/llava/clip.cpp +685 -150
  64. package/src/llama.cpp/examples/llava/clip.h +11 -2
  65. package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
  66. package/src/llama.cpp/examples/llava/llava.cpp +110 -24
  67. package/src/llama.cpp/examples/llava/llava.h +2 -3
  68. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  69. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  70. package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
  71. package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
  72. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
  73. package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
  74. package/src/llama.cpp/examples/main/main.cpp +210 -262
  75. package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
  76. package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
  77. package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
  78. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  80. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
  81. package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
  82. package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
  83. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
  84. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
  85. package/src/llama.cpp/examples/server/server.cpp +1027 -1073
  86. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  87. package/src/llama.cpp/examples/server/utils.hpp +107 -105
  88. package/src/llama.cpp/examples/simple/simple.cpp +35 -41
  89. package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
  90. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  91. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  92. package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
  93. package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
  94. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  95. package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
  96. package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
  97. package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
  98. package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
  99. package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
  100. package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
  101. package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
  102. package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
  103. package/src/llama.cpp/ggml/include/ggml.h +293 -186
  104. package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
  105. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
  106. package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
  107. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
  108. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
  109. package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
  110. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  111. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  112. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  113. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  114. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  116. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  117. package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
  118. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
  120. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  121. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  122. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  123. package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
  124. package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
  125. package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
  126. package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
  127. package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
  128. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  129. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
  130. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  132. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  134. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  135. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  136. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  137. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
  138. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  141. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
  142. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
  143. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  144. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  145. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  146. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
  148. package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
  149. package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
  150. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
  151. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
  152. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
  153. package/src/llama.cpp/include/llama.h +241 -264
  154. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  155. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  156. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  157. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  158. package/src/llama.cpp/src/llama-grammar.h +120 -15
  159. package/src/llama.cpp/src/llama-impl.h +156 -1
  160. package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
  161. package/src/llama.cpp/src/llama-sampling.h +20 -47
  162. package/src/llama.cpp/src/llama-vocab.cpp +343 -120
  163. package/src/llama.cpp/src/llama-vocab.h +33 -17
  164. package/src/llama.cpp/src/llama.cpp +4247 -1525
  165. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  166. package/src/llama.cpp/src/unicode-data.h +4 -4
  167. package/src/llama.cpp/src/unicode.cpp +15 -7
  168. package/src/llama.cpp/tests/CMakeLists.txt +3 -0
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
  171. package/src/llama.cpp/tests/test-barrier.cpp +93 -0
  172. package/src/llama.cpp/tests/test-grad0.cpp +187 -70
  173. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  174. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  175. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
  176. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  177. package/src/llama.cpp/tests/test-log.cpp +39 -0
  178. package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
  179. package/src/llama.cpp/tests/test-rope.cpp +1 -1
  180. package/src/llama.cpp/tests/test-sampling.cpp +157 -98
  181. package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
  182. package/patches/llama.patch +0 -22
  183. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  184. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  185. package/src/llama.cpp/common/grammar-parser.h +0 -29
  186. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  187. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
@@ -7,7 +7,7 @@
7
7
  #include <unordered_map>
8
8
  #include <unordered_set>
9
9
 
10
- const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
10
+ const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
11
11
  {0x000000, 0x0080},
12
12
  {0x000020, 0x0008},
13
13
  {0x000021, 0x0020},
@@ -2311,7 +2311,8 @@ const std::unordered_set<uint32_t> unicode_set_whitespace = {
2311
2311
  0x003000,
2312
2312
  };
2313
2313
 
2314
- const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
2314
+ // list is always in ascending order, to enable binary searh
2315
+ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
2315
2316
  {0x000041, 0x000061},
2316
2317
  {0x000042, 0x000062},
2317
2318
  {0x000043, 0x000063},
@@ -3747,7 +3748,8 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
3747
3748
  {0x01E921, 0x01E943},
3748
3749
  };
3749
3750
 
3750
- const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
3751
+ // list is always in ascending order, to enable binary searh
3752
+ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
3751
3753
  {0x000061, 0x000041},
3752
3754
  {0x000062, 0x000042},
3753
3755
  {0x000063, 0x000043},
@@ -5200,7 +5202,7 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
5200
5202
  {0x01E943, 0x01E921},
5201
5203
  };
5202
5204
 
5203
- const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd
5205
+ const std::initializer_list<range_nfd> unicode_ranges_nfd = { // start, last, nfd
5204
5206
  {0x000000, 0x000000, 0x000000},
5205
5207
  {0x0000C0, 0x0000C5, 0x000041},
5206
5208
  {0x0000C7, 0x0000C7, 0x000043},
@@ -13,8 +13,8 @@ struct range_nfd {
13
13
 
14
14
  static const uint32_t MAX_CODEPOINTS = 0x110000;
15
15
 
16
- extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
16
+ extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
17
17
  extern const std::unordered_set<uint32_t> unicode_set_whitespace;
18
- extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
19
- extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
20
- extern const std::vector<range_nfd> unicode_ranges_nfd;
18
+ extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
19
+ extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
20
+ extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
@@ -5,6 +5,7 @@
5
5
  #include "unicode.h"
6
6
  #include "unicode-data.h"
7
7
 
8
+ #include <algorithm>
8
9
  #include <cassert>
9
10
  #include <cstddef>
10
11
  #include <cstdint>
@@ -122,11 +123,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
122
123
  static std::vector<codepoint_flags> unicode_cpt_flags_array() {
123
124
  std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
124
125
 
125
- assert (unicode_ranges_flags.front().first == 0);
126
- assert (unicode_ranges_flags.back().first == MAX_CODEPOINTS);
126
+ assert (unicode_ranges_flags.begin()[0].first == 0);
127
+ assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
127
128
  for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
128
- const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags
129
- const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags
129
+ const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
130
+ const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
130
131
  for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
131
132
  cpt_flags[cpt] = range_ini.second;
132
133
  }
@@ -596,7 +597,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
596
597
  std::vector<uint32_t> result(cpts.size());
597
598
  for (size_t i = 0; i < cpts.size(); ++i) {
598
599
  const uint32_t cpt = cpts[i];
599
- auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1;
600
+ auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
600
601
  result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
601
602
  }
602
603
  return result;
@@ -638,8 +639,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
638
639
  }
639
640
 
640
641
  uint32_t unicode_tolower(uint32_t cp) {
641
- auto it = unicode_map_lowercase.find(cp);
642
- return it == unicode_map_lowercase.end() ? cp : it->second;
642
+ // binary search
643
+ auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
644
+ [](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
645
+ return pair.first < value;
646
+ });
647
+ if (it != unicode_map_lowercase.end() && it->first == cp) {
648
+ return it->second;
649
+ }
650
+ return cp; // Return the original code point if no lowercase mapping is found
643
651
  }
644
652
 
645
653
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
@@ -108,6 +108,8 @@ llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CU
108
108
  #llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
109
109
 
110
110
  # llama_target_and_test(test-double-float.cpp) # SLOW
111
+ llama_target_and_test(test-log.cpp)
112
+ llama_target_and_test(test-arg-parser.cpp)
111
113
  llama_target_and_test(test-quantize-fns.cpp)
112
114
  llama_target_and_test(test-quantize-perf.cpp)
113
115
  llama_target_and_test(test-sampling.cpp)
@@ -117,6 +119,7 @@ llama_target_and_test(test-grammar-parser.cpp)
117
119
  llama_target_and_test(test-llama-grammar.cpp)
118
120
  llama_target_and_test(test-grammar-integration.cpp)
119
121
  llama_target_and_test(test-grad0.cpp)
122
+ llama_target_and_test(test-barrier.cpp)
120
123
  # llama_target_and_test(test-opt.cpp) # SLOW
121
124
  llama_target_and_test(test-backend-ops.cpp)
122
125
 
@@ -0,0 +1,131 @@
1
+ #include "arg.h"
2
+ #include "common.h"
3
+
4
+ #include <string>
5
+ #include <vector>
6
+ #include <sstream>
7
+ #include <unordered_set>
8
+
9
+ #undef NDEBUG
10
+ #include <cassert>
11
+
12
+ int main(void) {
13
+ gpt_params params;
14
+
15
+ printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
16
+ for (int ex = 0; ex < LLAMA_EXAMPLE_COUNT; ex++) {
17
+ try {
18
+ auto ctx_arg = gpt_params_parser_init(params, (enum llama_example)ex);
19
+ std::unordered_set<std::string> seen_args;
20
+ std::unordered_set<std::string> seen_env_vars;
21
+ for (const auto & opt : ctx_arg.options) {
22
+ // check for args duplications
23
+ for (const auto & arg : opt.args) {
24
+ if (seen_args.find(arg) == seen_args.end()) {
25
+ seen_args.insert(arg);
26
+ } else {
27
+ fprintf(stderr, "test-arg-parser: found different handlers for the same argument: %s", arg);
28
+ exit(1);
29
+ }
30
+ }
31
+ // check for env var duplications
32
+ if (opt.env) {
33
+ if (seen_env_vars.find(opt.env) == seen_env_vars.end()) {
34
+ seen_env_vars.insert(opt.env);
35
+ } else {
36
+ fprintf(stderr, "test-arg-parser: found different handlers for the same env var: %s", opt.env);
37
+ exit(1);
38
+ }
39
+ }
40
+ }
41
+ } catch (std::exception & e) {
42
+ printf("%s\n", e.what());
43
+ assert(false);
44
+ }
45
+ }
46
+
47
+ auto list_str_to_char = [](std::vector<std::string> & argv) -> std::vector<char *> {
48
+ std::vector<char *> res;
49
+ for (auto & arg : argv) {
50
+ res.push_back(const_cast<char *>(arg.data()));
51
+ }
52
+ return res;
53
+ };
54
+
55
+ std::vector<std::string> argv;
56
+
57
+ printf("test-arg-parser: test invalid usage\n\n");
58
+
59
+ // missing value
60
+ argv = {"binary_name", "-m"};
61
+ assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
62
+
63
+ // wrong value (int)
64
+ argv = {"binary_name", "-ngl", "hello"};
65
+ assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
66
+
67
+ // wrong value (enum)
68
+ argv = {"binary_name", "-sm", "hello"};
69
+ assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
70
+
71
+ // non-existence arg in specific example (--draft cannot be used outside llama-speculative)
72
+ argv = {"binary_name", "--draft", "123"};
73
+ assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SERVER));
74
+
75
+
76
+ printf("test-arg-parser: test valid usage\n\n");
77
+
78
+ argv = {"binary_name", "-m", "model_file.gguf"};
79
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
80
+ assert(params.model == "model_file.gguf");
81
+
82
+ argv = {"binary_name", "-t", "1234"};
83
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
84
+ assert(params.cpuparams.n_threads == 1234);
85
+
86
+ argv = {"binary_name", "--verbose"};
87
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
88
+ assert(params.verbosity > 1);
89
+
90
+ argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
91
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
92
+ assert(params.model == "abc.gguf");
93
+ assert(params.n_predict == 6789);
94
+ assert(params.n_batch == 9090);
95
+
96
+ // --draft cannot be used outside llama-speculative
97
+ argv = {"binary_name", "--draft", "123"};
98
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE));
99
+ assert(params.n_draft == 123);
100
+
101
+ // skip this part on windows, because setenv is not supported
102
+ #ifdef _WIN32
103
+ printf("test-arg-parser: skip on windows build\n");
104
+ #else
105
+ printf("test-arg-parser: test environment variables (valid + invalid usages)\n\n");
106
+
107
+ setenv("LLAMA_ARG_THREADS", "blah", true);
108
+ argv = {"binary_name"};
109
+ assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
110
+
111
+ setenv("LLAMA_ARG_MODEL", "blah.gguf", true);
112
+ setenv("LLAMA_ARG_THREADS", "1010", true);
113
+ argv = {"binary_name"};
114
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
115
+ assert(params.model == "blah.gguf");
116
+ assert(params.cpuparams.n_threads == 1010);
117
+
118
+
119
+ printf("test-arg-parser: test environment variables being overwritten\n\n");
120
+
121
+ setenv("LLAMA_ARG_MODEL", "blah.gguf", true);
122
+ setenv("LLAMA_ARG_THREADS", "1010", true);
123
+ argv = {"binary_name", "-m", "overwritten.gguf"};
124
+ assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
125
+ assert(params.model == "overwritten.gguf");
126
+ assert(params.cpuparams.n_threads == 1010);
127
+ #endif // _WIN32
128
+
129
+
130
+ printf("test-arg-parser: all tests OK\n\n");
131
+ }