@fugood/llama.node 0.3.17 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CMakeLists.txt +3 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +39 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +366 -19
  24. package/src/LlamaCompletionWorker.h +30 -10
  25. package/src/LlamaContext.cpp +213 -5
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
  29. package/src/llama.cpp/.github/workflows/build.yml +41 -762
  30. package/src/llama.cpp/.github/workflows/docker.yml +5 -2
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +12 -12
  33. package/src/llama.cpp/CMakeLists.txt +5 -17
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +31 -3
  37. package/src/llama.cpp/common/arg.cpp +48 -29
  38. package/src/llama.cpp/common/chat.cpp +128 -106
  39. package/src/llama.cpp/common/chat.h +2 -0
  40. package/src/llama.cpp/common/common.cpp +37 -1
  41. package/src/llama.cpp/common/common.h +18 -9
  42. package/src/llama.cpp/common/llguidance.cpp +1 -0
  43. package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
  44. package/src/llama.cpp/common/minja/minja.hpp +69 -36
  45. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  46. package/src/llama.cpp/common/regex-partial.h +56 -0
  47. package/src/llama.cpp/common/sampling.cpp +57 -50
  48. package/src/llama.cpp/examples/CMakeLists.txt +2 -23
  49. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
  50. package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
  51. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  52. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  53. package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
  54. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  55. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  56. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  57. package/src/llama.cpp/ggml/include/ggml.h +10 -7
  58. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  60. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  61. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
  62. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  63. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
  64. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
  65. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
  66. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  67. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  68. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  69. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
  71. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  73. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
  74. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
  75. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  76. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  77. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
  78. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
  80. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
  81. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
  82. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  83. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  84. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  85. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  86. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
  87. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  88. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
  89. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  90. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  91. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  92. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
  93. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
  94. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
  95. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
  96. package/src/llama.cpp/ggml/src/ggml.c +29 -20
  97. package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
  98. package/src/llama.cpp/include/llama.h +52 -11
  99. package/src/llama.cpp/requirements/requirements-all.txt +3 -3
  100. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  101. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  102. package/src/llama.cpp/src/llama-adapter.cpp +6 -0
  103. package/src/llama.cpp/src/llama-arch.cpp +3 -0
  104. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  105. package/src/llama.cpp/src/llama-batch.h +2 -1
  106. package/src/llama.cpp/src/llama-chat.cpp +17 -7
  107. package/src/llama.cpp/src/llama-chat.h +1 -0
  108. package/src/llama.cpp/src/llama-context.cpp +389 -501
  109. package/src/llama.cpp/src/llama-context.h +44 -32
  110. package/src/llama.cpp/src/llama-cparams.h +1 -0
  111. package/src/llama.cpp/src/llama-graph.cpp +20 -38
  112. package/src/llama.cpp/src/llama-graph.h +12 -8
  113. package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
  114. package/src/llama.cpp/src/llama-kv-cache.h +271 -85
  115. package/src/llama.cpp/src/llama-memory.h +11 -1
  116. package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
  117. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  118. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  119. package/src/llama.cpp/src/llama-model.cpp +316 -69
  120. package/src/llama.cpp/src/llama-model.h +8 -1
  121. package/src/llama.cpp/src/llama-quant.cpp +15 -13
  122. package/src/llama.cpp/src/llama-sampling.cpp +18 -6
  123. package/src/llama.cpp/src/llama-vocab.cpp +42 -4
  124. package/src/llama.cpp/src/llama-vocab.h +6 -0
  125. package/src/llama.cpp/src/llama.cpp +14 -0
  126. package/src/llama.cpp/tests/CMakeLists.txt +10 -2
  127. package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
  128. package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
  129. package/src/llama.cpp/tests/test-chat.cpp +3 -1
  130. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  131. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  132. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  133. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  134. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  135. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
  136. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  137. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
  138. package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
  139. package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
  140. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
  141. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
  142. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  143. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
  144. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  145. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
  146. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  147. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
  148. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
  149. package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
  150. package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
  151. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  152. package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
  153. package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
  154. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  155. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  156. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  157. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  158. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  159. package/src/llama.cpp/examples/llava/clip.h +0 -135
  160. package/src/llama.cpp/examples/llava/llava.cpp +0 -586
  161. package/src/llama.cpp/examples/llava/llava.h +0 -49
  162. package/src/llama.cpp/examples/llava/mtmd.h +0 -168
  163. package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
  164. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  165. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  166. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  167. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  168. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  169. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  170. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  171. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  172. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  173. /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
  174. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  175. /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
  176. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  177. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  178. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  179. /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
  180. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  181. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  182. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  183. /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
  184. /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
  185. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  186. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  187. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  188. /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
  189. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  190. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  191. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  192. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
  193. /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
@@ -57,7 +57,8 @@ static helper_ctx_data helper_get_ctx_data(
57
57
  enum ggml_opt_loss_type loss_type = GGML_OPT_LOSS_TYPE_SUM) {
58
58
  std::vector<ggml_opt_dataset_t> datasets(ndata);
59
59
  for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
60
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(ne_datapoint, ne_label, ndata, ndata_shard);
60
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
61
+ GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
61
62
 
62
63
  float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
63
64
  float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
@@ -74,7 +75,8 @@ static helper_ctx_data helper_get_ctx_data(
74
75
  datasets[ndata_shard-1] = dataset;
75
76
  }
76
77
 
77
- ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(1, 0, ndata, /*ndata_shard =*/ 1);
78
+ ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
79
+ GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
78
80
 
79
81
  float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
80
82
 
@@ -113,7 +115,7 @@ static helper_ctx_data helper_get_ctx_data(
113
115
 
114
116
  struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
115
117
  ggml_set_name(weights, "weights");
116
- ggml_set_param(ctx_static, weights);
118
+ ggml_set_param(weights);
117
119
 
118
120
  struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
119
121
 
@@ -127,8 +129,11 @@ static helper_ctx_data helper_get_ctx_data(
127
129
  GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
128
130
  const int32_t opt_period = nbatch_logical / nbatch_physical;
129
131
 
130
- struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, ctx_compute, inputs, outputs, loss_type);
131
- opt_params.opt_period = opt_period;
132
+ struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
133
+ opt_params.ctx_compute = ctx_compute;
134
+ opt_params.inputs = inputs;
135
+ opt_params.outputs = outputs;
136
+ opt_params.opt_period = opt_period;
132
137
  if (!optimizer_defaults) {
133
138
  opt_params.get_opt_pars = helper_get_test_opt_pars;
134
139
  }
@@ -264,8 +269,9 @@ static std::pair<int, int> test_grad(ggml_backend_sched_t backend_sched, ggml_ba
264
269
 
265
270
  for (int idata = 0; idata < ndata; ++idata) {
266
271
  const float idataf = idata;
272
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
267
273
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
268
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
274
+ ggml_opt_eval(cd.opt_ctx, cd.result);
269
275
  ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
270
276
  }
271
277
 
@@ -334,8 +340,9 @@ static std::pair<int, int> test_forward_backward(
334
340
  } else {
335
341
  for (int idata = 0; idata < ndata; ++idata) {
336
342
  const float idataf = idata;
343
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
337
344
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
338
- ggml_opt_forward(cd.opt_ctx, cd.result);
345
+ ggml_opt_eval(cd.opt_ctx, cd.result);
339
346
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
340
347
  }
341
348
  }
@@ -367,7 +374,8 @@ static std::pair<int, int> test_forward_backward(
367
374
  float w0;
368
375
  ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
369
376
  for (int i = 0; i < 10; ++i) {
370
- ggml_opt_forward_backward(cd.opt_ctx, nullptr);
377
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
378
+ ggml_opt_eval(cd.opt_ctx, cd.result);
371
379
  }
372
380
  ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
373
381
 
@@ -387,8 +395,9 @@ static std::pair<int, int> test_forward_backward(
387
395
  } else {
388
396
  for (int idata = 0; idata < ndata; ++idata) {
389
397
  const float idataf = idata;
398
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
390
399
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
391
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
400
+ ggml_opt_eval(cd.opt_ctx, cd.result);
392
401
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
393
402
  }
394
403
  }
@@ -492,14 +501,16 @@ static std::pair<int, int> test_idata_split(ggml_backend_sched_t backend_sched,
492
501
  int idata = 0;
493
502
  for (; idata < idata_split; ++idata) {
494
503
  const float idataf = idata;
504
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
495
505
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
496
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
506
+ ggml_opt_eval(cd.opt_ctx, cd.result);
497
507
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
498
508
  }
499
509
  for (; idata < ndata; ++idata) {
500
510
  const float idataf = idata;
511
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
501
512
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
502
- ggml_opt_forward(cd.opt_ctx, cd.result2);
513
+ ggml_opt_eval(cd.opt_ctx, cd.result2);
503
514
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
504
515
  }
505
516
  }
@@ -573,7 +584,6 @@ static std::pair<int, int> test_gradient_accumulation(
573
584
 
574
585
  struct helper_ctx_data cd = helper_get_ctx_data(
575
586
  backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
576
- struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
577
587
 
578
588
  std::vector<float> grad_history(ndata);
579
589
  for (int64_t idata = 0; idata < ndata; ++idata) {
@@ -584,15 +594,17 @@ static std::pair<int, int> test_gradient_accumulation(
584
594
  if (nbatch_physical == 1) {
585
595
  for (int idata = 0; idata < ndata; ++idata) {
586
596
  const float idataf = idata;
597
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
587
598
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
588
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
599
+ ggml_opt_eval(cd.opt_ctx, cd.result);
589
600
  ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
590
601
  }
591
602
  } else if (nbatch_physical == 2) {
592
603
  for (int idata = 0; idata < ndata; idata += 2) {
593
604
  const float idataf[2] = {float(idata + 0), float(idata + 1)};
605
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
594
606
  ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
595
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
607
+ ggml_opt_eval(cd.opt_ctx, cd.result);
596
608
 
597
609
  grad_history[idata + 0] = 0.0f;
598
610
  ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
@@ -617,7 +629,7 @@ static std::pair<int, int> test_gradient_accumulation(
617
629
  }
618
630
  subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
619
631
  subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
620
- subtest_ok = subtest_ok && almost_equal(grad_history[5], 0.0, atol);
632
+ subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
621
633
  } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
622
634
  if (nbatch_physical == 1) {
623
635
  subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
@@ -630,7 +642,7 @@ static std::pair<int, int> test_gradient_accumulation(
630
642
  }
631
643
  subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
632
644
  subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
633
- subtest_ok = subtest_ok && almost_equal(grad_history[5], 0.0/ndata, atol);
645
+ subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
634
646
  } else {
635
647
  GGML_ASSERT(false);
636
648
  }
@@ -692,7 +704,8 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
692
704
  std::mt19937 gen(12345);
693
705
  std::normal_distribution<float> nd{0.0f, 0.1f};
694
706
 
695
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(1, 1, ndata_regression, ndata_regression);
707
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
708
+ GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
696
709
 
697
710
  float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
698
711
  float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
@@ -733,15 +746,14 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
733
746
 
734
747
  struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
735
748
  ggml_set_name(a, "a");
736
- ggml_set_param(ctx_static, a);
749
+ ggml_set_param(a);
737
750
 
738
751
  struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
739
752
  ggml_set_name(b, "b");
740
- ggml_set_param(ctx_static, b);
753
+ ggml_set_param(b);
741
754
 
742
755
  struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
743
756
  ggml_set_name(f, "f");
744
- ggml_set_param(ctx_static, f);
745
757
 
746
758
  ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
747
759
  const float a0 = 1.0f;
@@ -853,7 +865,7 @@ int main(void) {
853
865
  backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
854
866
 
855
867
  ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
856
- backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
868
+ backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
857
869
 
858
870
  printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
859
871
  printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
@@ -0,0 +1,288 @@
1
+ // Tests common_regex (esp. its partial final matches support).
2
+
3
+ #include "common.h"
4
+ #include "regex-partial.h"
5
+
6
+ #include <sstream>
7
+ #include <iostream>
8
+ #include <optional>
9
+
10
+ template <class T> static void assert_equals(const T & expected, const T & actual) {
11
+ if (expected != actual) {
12
+ std::cerr << "Expected: " << expected << std::endl;
13
+ std::cerr << " Actual: " << actual << std::endl;
14
+ std::cerr << std::flush;
15
+ throw std::runtime_error("Test failed");
16
+ }
17
+ }
18
+
19
+ struct test_case {
20
+ std::string pattern;
21
+ struct input_output {
22
+ std::string input;
23
+ common_regex_match output;
24
+ };
25
+ std::vector<input_output> inputs_outputs;
26
+ };
27
+
28
+ static std::string common_regex_match_type_name(common_regex_match_type type) {
29
+ switch (type) {
30
+ case COMMON_REGEX_MATCH_TYPE_NONE:
31
+ return "COMMON_REGEX_MATCH_TYPE_NONE";
32
+ case COMMON_REGEX_MATCH_TYPE_PARTIAL:
33
+ return "COMMON_REGEX_MATCH_TYPE_PARTIAL";
34
+ case COMMON_REGEX_MATCH_TYPE_FULL:
35
+ return "COMMON_REGEX_MATCH_TYPE_FULL";
36
+ }
37
+ return "?";
38
+ }
39
+
40
+ static void test_regex() {
41
+ printf("[%s]\n", __func__);
42
+ auto test = [](const test_case & test_case) {
43
+ common_regex cr(test_case.pattern);
44
+ std::cout << "Testing pattern: /" << test_case.pattern << "/\n";
45
+ // std::cout << " partial rev: " << cr.reversed_partial_pattern.str() << '\n';
46
+ for (const auto & input_output : test_case.inputs_outputs) {
47
+ std::cout << " Input: " << input_output.input << '\n';
48
+ auto m = cr.search(input_output.input, 0);
49
+ if (m != input_output.output) {
50
+ auto match_to_str = [&](const std::optional<common_regex_match> & m) {
51
+ std::ostringstream ss;
52
+ if (m->type == COMMON_REGEX_MATCH_TYPE_NONE) {
53
+ ss << "<no match>";
54
+ } else {
55
+ GGML_ASSERT(!input_output.output.groups.empty());
56
+ std::vector<std::string> parts;
57
+ for (const auto & g : m->groups) {
58
+ parts.push_back("{" + std::to_string(g.begin) + ", " + std::to_string(g.end) + "}");
59
+ }
60
+ ss << "{" << common_regex_match_type_name(m->type) << ", {" << string_join(parts, ", ") << "}}";
61
+ }
62
+ return ss.str();
63
+ };
64
+ std::cout << " Expected: " << match_to_str(input_output.output) << '\n';
65
+ std::cout << " Got: " << match_to_str(m) << '\n';
66
+ std::cout << " Inverted pattern: /" << regex_to_reversed_partial_regex(test_case.pattern) << "/\n";
67
+
68
+ throw std::runtime_error("Test failed");
69
+ }
70
+ }
71
+ };
72
+ test({
73
+ "a",
74
+ {
75
+ {"a", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 1}}}},
76
+ {"b", {COMMON_REGEX_MATCH_TYPE_NONE, {}}},
77
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 1}}}},
78
+ {"ba", {COMMON_REGEX_MATCH_TYPE_FULL, {{1, 2}}}},
79
+ }
80
+ });
81
+ test({
82
+ "abcd",
83
+ {
84
+ {"abcd", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
85
+ {"abcde", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
86
+ {"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
87
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
88
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
89
+ {"d", {}},
90
+ {"bcd", {}},
91
+ {"cde", {}},
92
+ {"cd", {}},
93
+ {"yeah ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{5, 7}}}},
94
+ {"abbie", {}},
95
+ {"", {}},
96
+ }
97
+ });
98
+ test({
99
+ ".*?ab",
100
+ {
101
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
102
+ {"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
103
+ {"dab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
104
+ {"dabc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
105
+ {"da", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
106
+ {"d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
107
+ }
108
+ });
109
+ test({
110
+ "a.*?b",
111
+ {
112
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
113
+ {"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
114
+ {"a b", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
115
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
116
+ {"argh", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
117
+ {"d", {}},
118
+ {"b", {}},
119
+ }
120
+ });
121
+ test({
122
+ "ab(?:cd){2,4}ef",
123
+ {
124
+ // {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, 0, {}}},
125
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
126
+ {"abcd", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
127
+ {"abcde", {}},
128
+ {"abcdef", {}},
129
+ {"abcdcd", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
130
+ {"abcdcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 7}}}},
131
+ {"abcdcdef", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}}}},
132
+ {"abcdcdcdcdef", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 12}}}},
133
+ {"abcdcdcdcdcdef", {}},
134
+ {"abcde", {}},
135
+ {"yea", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{2, 3}}}},
136
+ }
137
+ });
138
+ test({
139
+ "a(?:rte| pure )fact",
140
+ {
141
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
142
+ {"art", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
143
+ {"artefa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
144
+ {"fact", {}},
145
+ {"an arte", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{3, 7}}}},
146
+ {"artefact", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}}}},
147
+ {"an artefact", {COMMON_REGEX_MATCH_TYPE_FULL, {{3, 11}}}},
148
+ {"a pure", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
149
+ {"a pure fact", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 11}}}},
150
+ {"it's a pure fact", {COMMON_REGEX_MATCH_TYPE_FULL, {{5, 16}}}},
151
+ {"" , {}},
152
+ {"pure", {}},
153
+ {"pure fact", {}},
154
+ }
155
+ });
156
+ test({
157
+ "abc",
158
+ {
159
+ {" abcc", {COMMON_REGEX_MATCH_TYPE_FULL, {{1, 4}}}},
160
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
161
+ {"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
162
+ {" ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{1, 3}}}},
163
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
164
+ {"b", {}},
165
+ {"c", {}},
166
+ {"", {}},
167
+ }
168
+ });
169
+
170
+ test({
171
+ "(?:abc)?\\s*def",
172
+ {
173
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
174
+ {"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
175
+ {"abc ", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
176
+ {"abc d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
177
+ {"abc de", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
178
+ {"abc def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
179
+ {"abc defg", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
180
+ {"abc defgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
181
+ {"abcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
182
+ {"abcdefgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 6}}}},
183
+ {" d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
184
+ {"def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
185
+ }
186
+ });
187
+
188
+ test({
189
+ "a+b",
190
+ {
191
+ {"aaab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
192
+ {"aaa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
193
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
194
+ }
195
+ });
196
+
197
+ test({
198
+ "(?:"
199
+ "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
200
+ "(" // match 2 (open_tag)
201
+ "<tool_call>"
202
+ "|<function_call>"
203
+ "|<tool>"
204
+ "|<tools>"
205
+ "|<response>"
206
+ "|<json>"
207
+ "|<xml>"
208
+ "|<JSON>"
209
+ ")?"
210
+ "(\\s*\\{\\s*\"name\"\\s*:)" // match 3 (named tool call)
211
+ ")"
212
+ "|<function=([^>]+)>" // match 4 (function name)
213
+ "|<function name=\"([^\"]+)\">", // match 5 (function name again)
214
+ {
215
+ {"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}, {54, 54}, {54, 54}, {0, 8}, {54, 54}, {54, 54}}}},
216
+ {"<tool_call> {\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 18}}}},
217
+ {"<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 17}}}},
218
+ {"Let's call something\n<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{21, 38}}}},
219
+ {"Ok then<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 24}}}},
220
+ {"{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
221
+ {"Ok then{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 13}}}},
222
+ {"<tool_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 20}, {66, 66}, {0, 11}, {11, 20}, {66, 66}, {66, 66}}}},
223
+ {"<function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 24}, {70, 70}, {0, 15}, {15, 24}, {70, 70}, {70, 70}}}},
224
+ {"<function name=\"special_function\"> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 34}, {89, 89}, {89, 89}, {89, 89}, {89, 89}, {16, 32}}}},
225
+ {"<function=all>", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 14}, {14, 14}, {14, 14}, {14, 14}, {10, 13}, {14, 14}}}},
226
+
227
+ }
228
+ });
229
+ }
230
+
231
+ static void test_regex_to_reversed_partial_regex() {
232
+ printf("[%s]\n", __func__);
233
+
234
+ assert_equals<std::string>(
235
+ "((?:(?:c)?b)?a)[\\s\\S]*",
236
+ regex_to_reversed_partial_regex("abc"));
237
+
238
+ assert_equals<std::string>(
239
+ "(a+)[\\s\\S]*",
240
+ regex_to_reversed_partial_regex("a+"));
241
+
242
+ assert_equals<std::string>(
243
+ "(a*)[\\s\\S]*",
244
+ regex_to_reversed_partial_regex("a*"));
245
+
246
+ assert_equals<std::string>(
247
+ "(a?)[\\s\\S]*",
248
+ regex_to_reversed_partial_regex("a?"));
249
+
250
+ assert_equals<std::string>(
251
+ "([a-z])[\\s\\S]*",
252
+ regex_to_reversed_partial_regex("[a-z]"));
253
+
254
+ assert_equals<std::string>(
255
+ "((?:\\w+)?[a-z])[\\s\\S]*",
256
+ regex_to_reversed_partial_regex("[a-z]\\w+"));
257
+
258
+ assert_equals<std::string>(
259
+ "((?:a|b))[\\s\\S]*",
260
+ regex_to_reversed_partial_regex("(?:a|b)"));
261
+ assert_equals<std::string>(
262
+ "((?:(?:(?:d)?c)?b)?a)[\\s\\S]*",
263
+ regex_to_reversed_partial_regex("abcd"));
264
+ assert_equals<std::string>(
265
+ "((?:b)?a*)[\\s\\S]*", // TODO: ((?:b)?a*+).* ??
266
+ regex_to_reversed_partial_regex("a*b"));
267
+ assert_equals<std::string>(
268
+ "((?:(?:b)?a)?.*)[\\s\\S]*",
269
+ regex_to_reversed_partial_regex(".*?ab"));
270
+ assert_equals<std::string>(
271
+ "((?:(?:b)?.*)?a)[\\s\\S]*",
272
+ regex_to_reversed_partial_regex("a.*?b"));
273
+ assert_equals<std::string>(
274
+ "((?:(?:d)?(?:(?:c)?b))?a)[\\s\\S]*",
275
+ regex_to_reversed_partial_regex("a(bc)d"));
276
+ assert_equals<std::string>(
277
+ "((?:(?:(?:c)?b|(?:e)?d))?a)[\\s\\S]*",
278
+ regex_to_reversed_partial_regex("a(bc|de)"));
279
+ assert_equals<std::string>(
280
+ "((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)[\\s\\S]*",
281
+ regex_to_reversed_partial_regex("ab{2,4}c"));
282
+ }
283
+
284
+ int main() {
285
+ test_regex_to_reversed_partial_regex();
286
+ test_regex();
287
+ std::cout << "All tests passed.\n";
288
+ }
@@ -360,7 +360,7 @@ int main(void) {
360
360
  test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 3, 4, 0, 1}, {0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, 1.0f, 1.1f, 4, 7, {});
361
361
 
362
362
  test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.571429f, 0.428571f, 0.0f, 0.0f}, 1.00f);
363
- test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.00f);
363
+ test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0.00f); // top_n_sigma == 0 now represents a no-op rather than greedy decoding as of PR#13345
364
364
  test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 3.00f);
365
365
 
366
366
  test_sampler_queue(10000, "k", 10000, 1.0f, 1.0f);
@@ -0,0 +1,39 @@
1
+ # dependencies
2
+
3
+ find_package(Threads REQUIRED)
4
+
5
+ # third-party
6
+
7
+ # ...
8
+
9
+ # flags
10
+
11
+ llama_add_compile_flags()
12
+
13
+ # tools
14
+
15
+ if (EMSCRIPTEN)
16
+ else()
17
+ add_subdirectory(batched-bench)
18
+ add_subdirectory(gguf-split)
19
+ add_subdirectory(imatrix)
20
+ add_subdirectory(llama-bench)
21
+ add_subdirectory(main)
22
+ add_subdirectory(perplexity)
23
+ add_subdirectory(quantize)
24
+ if (LLAMA_BUILD_SERVER)
25
+ add_subdirectory(server)
26
+ endif()
27
+ add_subdirectory(run)
28
+ add_subdirectory(tokenize)
29
+ add_subdirectory(tts)
30
+ add_subdirectory(mtmd)
31
+ if (GGML_RPC)
32
+ add_subdirectory(rpc)
33
+ endif()
34
+ if (NOT GGML_BACKEND_DL)
35
+ # these examples use the backends directly and cannot be built with dynamic loading
36
+ add_subdirectory(cvector-generator)
37
+ add_subdirectory(export-lora)
38
+ endif()
39
+ endif()
@@ -123,8 +123,8 @@ int main(int argc, char ** argv) {
123
123
 
124
124
  common_batch_clear(batch);
125
125
 
126
- for (int i = 0; i < pp; ++i) {
127
- for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
126
+ for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
127
+ for (int i = 0; i < pp; ++i) {
128
128
  common_batch_add(batch, 0, i, { j }, false);
129
129
  }
130
130
  }
@@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) {
24
24
  LOG("\n %s \\\n"
25
25
  " -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
26
26
  " [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
27
- " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
27
+ " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
28
+ " [--parse-special]\n" , argv[0]);
28
29
  LOG("\n");
29
30
  }
30
31
 
@@ -46,7 +47,7 @@ private:
46
47
  common_params m_params;
47
48
  std::mutex m_mutex;
48
49
  int m_last_call = 0;
49
- std::vector<float> m_src1_data;
50
+ std::vector<char> m_src1_data;
50
51
  std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
51
52
  };
52
53
 
@@ -93,11 +94,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
93
94
  const bool is_host = ggml_backend_buffer_is_host(src1->buffer);
94
95
 
95
96
  if (!is_host) {
96
- m_src1_data.resize(ggml_nelements(src1));
97
- ggml_backend_tensor_get(src1, m_src1_data.data(), 0, ggml_nbytes(src1));
97
+ const size_t src1_nbytes = ggml_nbytes(src1);
98
+ m_src1_data.resize(src1_nbytes);
99
+ ggml_backend_tensor_get(src1, m_src1_data.data(), 0, src1_nbytes);
98
100
  }
99
101
 
100
- const float * data = is_host ? (const float *) src1->data : m_src1_data.data();
102
+ const char * data = is_host ? (const char *) src1->data : m_src1_data.data();
103
+ GGML_ASSERT(src1->nb[0] == ggml_element_size(src1));
101
104
 
102
105
  // this has been adapted to the new format of storing merged experts in a single 3d tensor
103
106
  // ref: https://github.com/ggml-org/llama.cpp/pull/6387
@@ -144,7 +147,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
144
147
 
145
148
  const int64_t i11 = idx % src1->ne[1];
146
149
  const int64_t i12 = row;
147
- const float * x = (const float *)((const char *)data + i11*src1->nb[1] + i12*src1->nb[2]);
150
+ const float * x = (const float *)(data + i11*src1->nb[1] + i12*src1->nb[2]);
148
151
 
149
152
  for (int j = 0; j < (int)src1->ne[0]; ++j) {
150
153
  e.values[e_start + j] += x[j]*x[j];
@@ -180,7 +183,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
180
183
  ++e.ncall;
181
184
  LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
182
185
  for (int row = 0; row < (int)src1->ne[1]; ++row) {
183
- const float * x = data + row * src1->ne[0];
186
+ const float * x = (const float *) (data + row * src1->nb[1]);
184
187
  for (int j = 0; j < (int)src1->ne[0]; ++j) {
185
188
  e.values[j] += x[j]*x[j];
186
189
  e.counts[j]++;
@@ -437,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
437
440
  auto tim1 = std::chrono::high_resolution_clock::now();
438
441
  LOG_INF("%s: tokenizing the input ..\n", __func__);
439
442
 
440
- std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
443
+ std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
441
444
 
442
445
  auto tim2 = std::chrono::high_resolution_clock::now();
443
446
  LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
@@ -583,7 +586,6 @@ int main(int argc, char ** argv) {
583
586
  params.out_file = "imatrix.dat" ;
584
587
 
585
588
  params.n_ctx = 512;
586
- params.logits_all = true;
587
589
  params.escape = false;
588
590
 
589
591
  if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) {