@fugood/llama.node 0.3.17 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +39 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +366 -19
- package/src/LlamaCompletionWorker.h +30 -10
- package/src/LlamaContext.cpp +213 -5
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
- package/src/llama.cpp/.github/workflows/build.yml +41 -762
- package/src/llama.cpp/.github/workflows/docker.yml +5 -2
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +12 -12
- package/src/llama.cpp/CMakeLists.txt +5 -17
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +31 -3
- package/src/llama.cpp/common/arg.cpp +48 -29
- package/src/llama.cpp/common/chat.cpp +128 -106
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +37 -1
- package/src/llama.cpp/common/common.h +18 -9
- package/src/llama.cpp/common/llguidance.cpp +1 -0
- package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/src/llama.cpp/common/minja/minja.hpp +69 -36
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +57 -50
- package/src/llama.cpp/examples/CMakeLists.txt +2 -23
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml.h +10 -7
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
- package/src/llama.cpp/ggml/src/ggml.c +29 -20
- package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/src/llama.cpp/include/llama.h +52 -11
- package/src/llama.cpp/requirements/requirements-all.txt +3 -3
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +3 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +17 -7
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +389 -501
- package/src/llama.cpp/src/llama-context.h +44 -32
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +20 -38
- package/src/llama.cpp/src/llama-graph.h +12 -8
- package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
- package/src/llama.cpp/src/llama-kv-cache.h +271 -85
- package/src/llama.cpp/src/llama-memory.h +11 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +316 -69
- package/src/llama.cpp/src/llama-model.h +8 -1
- package/src/llama.cpp/src/llama-quant.cpp +15 -13
- package/src/llama.cpp/src/llama-sampling.cpp +18 -6
- package/src/llama.cpp/src/llama-vocab.cpp +42 -4
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +14 -0
- package/src/llama.cpp/tests/CMakeLists.txt +10 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
- package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
- package/src/llama.cpp/tests/test-chat.cpp +3 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
- package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
- package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
- package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
- package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.h +0 -135
- package/src/llama.cpp/examples/llava/llava.cpp +0 -586
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/mtmd.h +0 -168
- package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
|
@@ -57,7 +57,8 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
57
57
|
enum ggml_opt_loss_type loss_type = GGML_OPT_LOSS_TYPE_SUM) {
|
|
58
58
|
std::vector<ggml_opt_dataset_t> datasets(ndata);
|
|
59
59
|
for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
|
|
60
|
-
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
60
|
+
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
61
|
+
GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
|
|
61
62
|
|
|
62
63
|
float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
|
|
63
64
|
float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
|
|
@@ -74,7 +75,8 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
74
75
|
datasets[ndata_shard-1] = dataset;
|
|
75
76
|
}
|
|
76
77
|
|
|
77
|
-
ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
|
|
78
|
+
ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
|
|
79
|
+
GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
|
|
78
80
|
|
|
79
81
|
float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
|
|
80
82
|
|
|
@@ -113,7 +115,7 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
113
115
|
|
|
114
116
|
struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
|
|
115
117
|
ggml_set_name(weights, "weights");
|
|
116
|
-
ggml_set_param(
|
|
118
|
+
ggml_set_param(weights);
|
|
117
119
|
|
|
118
120
|
struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
|
|
119
121
|
|
|
@@ -127,8 +129,11 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
127
129
|
GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
|
|
128
130
|
const int32_t opt_period = nbatch_logical / nbatch_physical;
|
|
129
131
|
|
|
130
|
-
struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched,
|
|
131
|
-
opt_params.
|
|
132
|
+
struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
|
|
133
|
+
opt_params.ctx_compute = ctx_compute;
|
|
134
|
+
opt_params.inputs = inputs;
|
|
135
|
+
opt_params.outputs = outputs;
|
|
136
|
+
opt_params.opt_period = opt_period;
|
|
132
137
|
if (!optimizer_defaults) {
|
|
133
138
|
opt_params.get_opt_pars = helper_get_test_opt_pars;
|
|
134
139
|
}
|
|
@@ -264,8 +269,9 @@ static std::pair<int, int> test_grad(ggml_backend_sched_t backend_sched, ggml_ba
|
|
|
264
269
|
|
|
265
270
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
266
271
|
const float idataf = idata;
|
|
272
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
267
273
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
268
|
-
|
|
274
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
269
275
|
ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
|
|
270
276
|
}
|
|
271
277
|
|
|
@@ -334,8 +340,9 @@ static std::pair<int, int> test_forward_backward(
|
|
|
334
340
|
} else {
|
|
335
341
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
336
342
|
const float idataf = idata;
|
|
343
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
|
|
337
344
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
338
|
-
|
|
345
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
339
346
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
340
347
|
}
|
|
341
348
|
}
|
|
@@ -367,7 +374,8 @@ static std::pair<int, int> test_forward_backward(
|
|
|
367
374
|
float w0;
|
|
368
375
|
ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
|
|
369
376
|
for (int i = 0; i < 10; ++i) {
|
|
370
|
-
|
|
377
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
378
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
371
379
|
}
|
|
372
380
|
ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
|
|
373
381
|
|
|
@@ -387,8 +395,9 @@ static std::pair<int, int> test_forward_backward(
|
|
|
387
395
|
} else {
|
|
388
396
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
389
397
|
const float idataf = idata;
|
|
398
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
390
399
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
391
|
-
|
|
400
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
392
401
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
393
402
|
}
|
|
394
403
|
}
|
|
@@ -492,14 +501,16 @@ static std::pair<int, int> test_idata_split(ggml_backend_sched_t backend_sched,
|
|
|
492
501
|
int idata = 0;
|
|
493
502
|
for (; idata < idata_split; ++idata) {
|
|
494
503
|
const float idataf = idata;
|
|
504
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
495
505
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
496
|
-
|
|
506
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
497
507
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
498
508
|
}
|
|
499
509
|
for (; idata < ndata; ++idata) {
|
|
500
510
|
const float idataf = idata;
|
|
511
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
|
|
501
512
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
502
|
-
|
|
513
|
+
ggml_opt_eval(cd.opt_ctx, cd.result2);
|
|
503
514
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
504
515
|
}
|
|
505
516
|
}
|
|
@@ -573,7 +584,6 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
573
584
|
|
|
574
585
|
struct helper_ctx_data cd = helper_get_ctx_data(
|
|
575
586
|
backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
|
|
576
|
-
struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
|
|
577
587
|
|
|
578
588
|
std::vector<float> grad_history(ndata);
|
|
579
589
|
for (int64_t idata = 0; idata < ndata; ++idata) {
|
|
@@ -584,15 +594,17 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
584
594
|
if (nbatch_physical == 1) {
|
|
585
595
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
586
596
|
const float idataf = idata;
|
|
597
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
587
598
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
|
|
588
|
-
|
|
599
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
589
600
|
ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
|
|
590
601
|
}
|
|
591
602
|
} else if (nbatch_physical == 2) {
|
|
592
603
|
for (int idata = 0; idata < ndata; idata += 2) {
|
|
593
604
|
const float idataf[2] = {float(idata + 0), float(idata + 1)};
|
|
605
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
594
606
|
ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
|
|
595
|
-
|
|
607
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
596
608
|
|
|
597
609
|
grad_history[idata + 0] = 0.0f;
|
|
598
610
|
ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
|
|
@@ -617,7 +629,7 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
617
629
|
}
|
|
618
630
|
subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
|
|
619
631
|
subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
|
|
620
|
-
subtest_ok = subtest_ok && almost_equal(grad_history[5],
|
|
632
|
+
subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
|
|
621
633
|
} else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
|
|
622
634
|
if (nbatch_physical == 1) {
|
|
623
635
|
subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
|
|
@@ -630,7 +642,7 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
630
642
|
}
|
|
631
643
|
subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
|
|
632
644
|
subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
|
|
633
|
-
subtest_ok = subtest_ok && almost_equal(grad_history[5],
|
|
645
|
+
subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
|
|
634
646
|
} else {
|
|
635
647
|
GGML_ASSERT(false);
|
|
636
648
|
}
|
|
@@ -692,7 +704,8 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
|
|
|
692
704
|
std::mt19937 gen(12345);
|
|
693
705
|
std::normal_distribution<float> nd{0.0f, 0.1f};
|
|
694
706
|
|
|
695
|
-
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
707
|
+
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
708
|
+
GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
|
|
696
709
|
|
|
697
710
|
float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
|
|
698
711
|
float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
|
|
@@ -733,15 +746,14 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
|
|
|
733
746
|
|
|
734
747
|
struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
|
|
735
748
|
ggml_set_name(a, "a");
|
|
736
|
-
ggml_set_param(
|
|
749
|
+
ggml_set_param(a);
|
|
737
750
|
|
|
738
751
|
struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
|
|
739
752
|
ggml_set_name(b, "b");
|
|
740
|
-
ggml_set_param(
|
|
753
|
+
ggml_set_param(b);
|
|
741
754
|
|
|
742
755
|
struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
|
|
743
756
|
ggml_set_name(f, "f");
|
|
744
|
-
ggml_set_param(ctx_static, f);
|
|
745
757
|
|
|
746
758
|
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
|
|
747
759
|
const float a0 = 1.0f;
|
|
@@ -853,7 +865,7 @@ int main(void) {
|
|
|
853
865
|
backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
|
|
854
866
|
|
|
855
867
|
ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
|
|
856
|
-
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
|
|
868
|
+
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
|
|
857
869
|
|
|
858
870
|
printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
|
|
859
871
|
printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
// Tests common_regex (esp. its partial final matches support).
|
|
2
|
+
|
|
3
|
+
#include "common.h"
|
|
4
|
+
#include "regex-partial.h"
|
|
5
|
+
|
|
6
|
+
#include <sstream>
|
|
7
|
+
#include <iostream>
|
|
8
|
+
#include <optional>
|
|
9
|
+
|
|
10
|
+
template <class T> static void assert_equals(const T & expected, const T & actual) {
|
|
11
|
+
if (expected != actual) {
|
|
12
|
+
std::cerr << "Expected: " << expected << std::endl;
|
|
13
|
+
std::cerr << " Actual: " << actual << std::endl;
|
|
14
|
+
std::cerr << std::flush;
|
|
15
|
+
throw std::runtime_error("Test failed");
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
struct test_case {
|
|
20
|
+
std::string pattern;
|
|
21
|
+
struct input_output {
|
|
22
|
+
std::string input;
|
|
23
|
+
common_regex_match output;
|
|
24
|
+
};
|
|
25
|
+
std::vector<input_output> inputs_outputs;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
static std::string common_regex_match_type_name(common_regex_match_type type) {
|
|
29
|
+
switch (type) {
|
|
30
|
+
case COMMON_REGEX_MATCH_TYPE_NONE:
|
|
31
|
+
return "COMMON_REGEX_MATCH_TYPE_NONE";
|
|
32
|
+
case COMMON_REGEX_MATCH_TYPE_PARTIAL:
|
|
33
|
+
return "COMMON_REGEX_MATCH_TYPE_PARTIAL";
|
|
34
|
+
case COMMON_REGEX_MATCH_TYPE_FULL:
|
|
35
|
+
return "COMMON_REGEX_MATCH_TYPE_FULL";
|
|
36
|
+
}
|
|
37
|
+
return "?";
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static void test_regex() {
|
|
41
|
+
printf("[%s]\n", __func__);
|
|
42
|
+
auto test = [](const test_case & test_case) {
|
|
43
|
+
common_regex cr(test_case.pattern);
|
|
44
|
+
std::cout << "Testing pattern: /" << test_case.pattern << "/\n";
|
|
45
|
+
// std::cout << " partial rev: " << cr.reversed_partial_pattern.str() << '\n';
|
|
46
|
+
for (const auto & input_output : test_case.inputs_outputs) {
|
|
47
|
+
std::cout << " Input: " << input_output.input << '\n';
|
|
48
|
+
auto m = cr.search(input_output.input, 0);
|
|
49
|
+
if (m != input_output.output) {
|
|
50
|
+
auto match_to_str = [&](const std::optional<common_regex_match> & m) {
|
|
51
|
+
std::ostringstream ss;
|
|
52
|
+
if (m->type == COMMON_REGEX_MATCH_TYPE_NONE) {
|
|
53
|
+
ss << "<no match>";
|
|
54
|
+
} else {
|
|
55
|
+
GGML_ASSERT(!input_output.output.groups.empty());
|
|
56
|
+
std::vector<std::string> parts;
|
|
57
|
+
for (const auto & g : m->groups) {
|
|
58
|
+
parts.push_back("{" + std::to_string(g.begin) + ", " + std::to_string(g.end) + "}");
|
|
59
|
+
}
|
|
60
|
+
ss << "{" << common_regex_match_type_name(m->type) << ", {" << string_join(parts, ", ") << "}}";
|
|
61
|
+
}
|
|
62
|
+
return ss.str();
|
|
63
|
+
};
|
|
64
|
+
std::cout << " Expected: " << match_to_str(input_output.output) << '\n';
|
|
65
|
+
std::cout << " Got: " << match_to_str(m) << '\n';
|
|
66
|
+
std::cout << " Inverted pattern: /" << regex_to_reversed_partial_regex(test_case.pattern) << "/\n";
|
|
67
|
+
|
|
68
|
+
throw std::runtime_error("Test failed");
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
test({
|
|
73
|
+
"a",
|
|
74
|
+
{
|
|
75
|
+
{"a", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 1}}}},
|
|
76
|
+
{"b", {COMMON_REGEX_MATCH_TYPE_NONE, {}}},
|
|
77
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 1}}}},
|
|
78
|
+
{"ba", {COMMON_REGEX_MATCH_TYPE_FULL, {{1, 2}}}},
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
test({
|
|
82
|
+
"abcd",
|
|
83
|
+
{
|
|
84
|
+
{"abcd", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
|
|
85
|
+
{"abcde", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
|
|
86
|
+
{"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
|
|
87
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
|
|
88
|
+
{"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
|
|
89
|
+
{"d", {}},
|
|
90
|
+
{"bcd", {}},
|
|
91
|
+
{"cde", {}},
|
|
92
|
+
{"cd", {}},
|
|
93
|
+
{"yeah ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{5, 7}}}},
|
|
94
|
+
{"abbie", {}},
|
|
95
|
+
{"", {}},
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
test({
|
|
99
|
+
".*?ab",
|
|
100
|
+
{
|
|
101
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
|
|
102
|
+
{"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
|
|
103
|
+
{"dab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
|
|
104
|
+
{"dabc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
|
|
105
|
+
{"da", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
|
|
106
|
+
{"d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
test({
|
|
110
|
+
"a.*?b",
|
|
111
|
+
{
|
|
112
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
|
|
113
|
+
{"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
|
|
114
|
+
{"a b", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
|
|
115
|
+
{"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
|
|
116
|
+
{"argh", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
|
|
117
|
+
{"d", {}},
|
|
118
|
+
{"b", {}},
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
test({
|
|
122
|
+
"ab(?:cd){2,4}ef",
|
|
123
|
+
{
|
|
124
|
+
// {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, 0, {}}},
|
|
125
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
|
|
126
|
+
{"abcd", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
|
|
127
|
+
{"abcde", {}},
|
|
128
|
+
{"abcdef", {}},
|
|
129
|
+
{"abcdcd", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
|
|
130
|
+
{"abcdcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 7}}}},
|
|
131
|
+
{"abcdcdef", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}}}},
|
|
132
|
+
{"abcdcdcdcdef", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 12}}}},
|
|
133
|
+
{"abcdcdcdcdcdef", {}},
|
|
134
|
+
{"abcde", {}},
|
|
135
|
+
{"yea", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{2, 3}}}},
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
test({
|
|
139
|
+
"a(?:rte| pure )fact",
|
|
140
|
+
{
|
|
141
|
+
{"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
|
|
142
|
+
{"art", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
|
|
143
|
+
{"artefa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
|
|
144
|
+
{"fact", {}},
|
|
145
|
+
{"an arte", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{3, 7}}}},
|
|
146
|
+
{"artefact", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}}}},
|
|
147
|
+
{"an artefact", {COMMON_REGEX_MATCH_TYPE_FULL, {{3, 11}}}},
|
|
148
|
+
{"a pure", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
|
|
149
|
+
{"a pure fact", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 11}}}},
|
|
150
|
+
{"it's a pure fact", {COMMON_REGEX_MATCH_TYPE_FULL, {{5, 16}}}},
|
|
151
|
+
{"" , {}},
|
|
152
|
+
{"pure", {}},
|
|
153
|
+
{"pure fact", {}},
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
test({
|
|
157
|
+
"abc",
|
|
158
|
+
{
|
|
159
|
+
{" abcc", {COMMON_REGEX_MATCH_TYPE_FULL, {{1, 4}}}},
|
|
160
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
|
|
161
|
+
{"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
|
|
162
|
+
{" ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{1, 3}}}},
|
|
163
|
+
{"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
|
|
164
|
+
{"b", {}},
|
|
165
|
+
{"c", {}},
|
|
166
|
+
{"", {}},
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test({
|
|
171
|
+
"(?:abc)?\\s*def",
|
|
172
|
+
{
|
|
173
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
|
|
174
|
+
{"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
|
|
175
|
+
{"abc ", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
|
|
176
|
+
{"abc d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
|
|
177
|
+
{"abc de", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
|
|
178
|
+
{"abc def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
|
|
179
|
+
{"abc defg", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
|
|
180
|
+
{"abc defgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
|
|
181
|
+
{"abcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
|
|
182
|
+
{"abcdefgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 6}}}},
|
|
183
|
+
{" d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
|
|
184
|
+
{"def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
test({
|
|
189
|
+
"a+b",
|
|
190
|
+
{
|
|
191
|
+
{"aaab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
|
|
192
|
+
{"aaa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
|
|
193
|
+
{"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test({
|
|
198
|
+
"(?:"
|
|
199
|
+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
|
200
|
+
"(" // match 2 (open_tag)
|
|
201
|
+
"<tool_call>"
|
|
202
|
+
"|<function_call>"
|
|
203
|
+
"|<tool>"
|
|
204
|
+
"|<tools>"
|
|
205
|
+
"|<response>"
|
|
206
|
+
"|<json>"
|
|
207
|
+
"|<xml>"
|
|
208
|
+
"|<JSON>"
|
|
209
|
+
")?"
|
|
210
|
+
"(\\s*\\{\\s*\"name\"\\s*:)" // match 3 (named tool call)
|
|
211
|
+
")"
|
|
212
|
+
"|<function=([^>]+)>" // match 4 (function name)
|
|
213
|
+
"|<function name=\"([^\"]+)\">", // match 5 (function name again)
|
|
214
|
+
{
|
|
215
|
+
{"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}, {54, 54}, {54, 54}, {0, 8}, {54, 54}, {54, 54}}}},
|
|
216
|
+
{"<tool_call> {\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 18}}}},
|
|
217
|
+
{"<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 17}}}},
|
|
218
|
+
{"Let's call something\n<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{21, 38}}}},
|
|
219
|
+
{"Ok then<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 24}}}},
|
|
220
|
+
{"{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
|
|
221
|
+
{"Ok then{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 13}}}},
|
|
222
|
+
{"<tool_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 20}, {66, 66}, {0, 11}, {11, 20}, {66, 66}, {66, 66}}}},
|
|
223
|
+
{"<function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 24}, {70, 70}, {0, 15}, {15, 24}, {70, 70}, {70, 70}}}},
|
|
224
|
+
{"<function name=\"special_function\"> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 34}, {89, 89}, {89, 89}, {89, 89}, {89, 89}, {16, 32}}}},
|
|
225
|
+
{"<function=all>", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 14}, {14, 14}, {14, 14}, {14, 14}, {10, 13}, {14, 14}}}},
|
|
226
|
+
|
|
227
|
+
}
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
static void test_regex_to_reversed_partial_regex() {
|
|
232
|
+
printf("[%s]\n", __func__);
|
|
233
|
+
|
|
234
|
+
assert_equals<std::string>(
|
|
235
|
+
"((?:(?:c)?b)?a)[\\s\\S]*",
|
|
236
|
+
regex_to_reversed_partial_regex("abc"));
|
|
237
|
+
|
|
238
|
+
assert_equals<std::string>(
|
|
239
|
+
"(a+)[\\s\\S]*",
|
|
240
|
+
regex_to_reversed_partial_regex("a+"));
|
|
241
|
+
|
|
242
|
+
assert_equals<std::string>(
|
|
243
|
+
"(a*)[\\s\\S]*",
|
|
244
|
+
regex_to_reversed_partial_regex("a*"));
|
|
245
|
+
|
|
246
|
+
assert_equals<std::string>(
|
|
247
|
+
"(a?)[\\s\\S]*",
|
|
248
|
+
regex_to_reversed_partial_regex("a?"));
|
|
249
|
+
|
|
250
|
+
assert_equals<std::string>(
|
|
251
|
+
"([a-z])[\\s\\S]*",
|
|
252
|
+
regex_to_reversed_partial_regex("[a-z]"));
|
|
253
|
+
|
|
254
|
+
assert_equals<std::string>(
|
|
255
|
+
"((?:\\w+)?[a-z])[\\s\\S]*",
|
|
256
|
+
regex_to_reversed_partial_regex("[a-z]\\w+"));
|
|
257
|
+
|
|
258
|
+
assert_equals<std::string>(
|
|
259
|
+
"((?:a|b))[\\s\\S]*",
|
|
260
|
+
regex_to_reversed_partial_regex("(?:a|b)"));
|
|
261
|
+
assert_equals<std::string>(
|
|
262
|
+
"((?:(?:(?:d)?c)?b)?a)[\\s\\S]*",
|
|
263
|
+
regex_to_reversed_partial_regex("abcd"));
|
|
264
|
+
assert_equals<std::string>(
|
|
265
|
+
"((?:b)?a*)[\\s\\S]*", // TODO: ((?:b)?a*+).* ??
|
|
266
|
+
regex_to_reversed_partial_regex("a*b"));
|
|
267
|
+
assert_equals<std::string>(
|
|
268
|
+
"((?:(?:b)?a)?.*)[\\s\\S]*",
|
|
269
|
+
regex_to_reversed_partial_regex(".*?ab"));
|
|
270
|
+
assert_equals<std::string>(
|
|
271
|
+
"((?:(?:b)?.*)?a)[\\s\\S]*",
|
|
272
|
+
regex_to_reversed_partial_regex("a.*?b"));
|
|
273
|
+
assert_equals<std::string>(
|
|
274
|
+
"((?:(?:d)?(?:(?:c)?b))?a)[\\s\\S]*",
|
|
275
|
+
regex_to_reversed_partial_regex("a(bc)d"));
|
|
276
|
+
assert_equals<std::string>(
|
|
277
|
+
"((?:(?:(?:c)?b|(?:e)?d))?a)[\\s\\S]*",
|
|
278
|
+
regex_to_reversed_partial_regex("a(bc|de)"));
|
|
279
|
+
assert_equals<std::string>(
|
|
280
|
+
"((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)[\\s\\S]*",
|
|
281
|
+
regex_to_reversed_partial_regex("ab{2,4}c"));
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
int main() {
|
|
285
|
+
test_regex_to_reversed_partial_regex();
|
|
286
|
+
test_regex();
|
|
287
|
+
std::cout << "All tests passed.\n";
|
|
288
|
+
}
|
|
@@ -360,7 +360,7 @@ int main(void) {
|
|
|
360
360
|
test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 3, 4, 0, 1}, {0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, 1.0f, 1.1f, 4, 7, {});
|
|
361
361
|
|
|
362
362
|
test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.571429f, 0.428571f, 0.0f, 0.0f}, 1.00f);
|
|
363
|
-
test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {
|
|
363
|
+
test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0.00f); // top_n_sigma == 0 now represents a no-op rather than greedy decoding as of PR#13345
|
|
364
364
|
test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 3.00f);
|
|
365
365
|
|
|
366
366
|
test_sampler_queue(10000, "k", 10000, 1.0f, 1.0f);
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# dependencies
|
|
2
|
+
|
|
3
|
+
find_package(Threads REQUIRED)
|
|
4
|
+
|
|
5
|
+
# third-party
|
|
6
|
+
|
|
7
|
+
# ...
|
|
8
|
+
|
|
9
|
+
# flags
|
|
10
|
+
|
|
11
|
+
llama_add_compile_flags()
|
|
12
|
+
|
|
13
|
+
# tools
|
|
14
|
+
|
|
15
|
+
if (EMSCRIPTEN)
|
|
16
|
+
else()
|
|
17
|
+
add_subdirectory(batched-bench)
|
|
18
|
+
add_subdirectory(gguf-split)
|
|
19
|
+
add_subdirectory(imatrix)
|
|
20
|
+
add_subdirectory(llama-bench)
|
|
21
|
+
add_subdirectory(main)
|
|
22
|
+
add_subdirectory(perplexity)
|
|
23
|
+
add_subdirectory(quantize)
|
|
24
|
+
if (LLAMA_BUILD_SERVER)
|
|
25
|
+
add_subdirectory(server)
|
|
26
|
+
endif()
|
|
27
|
+
add_subdirectory(run)
|
|
28
|
+
add_subdirectory(tokenize)
|
|
29
|
+
add_subdirectory(tts)
|
|
30
|
+
add_subdirectory(mtmd)
|
|
31
|
+
if (GGML_RPC)
|
|
32
|
+
add_subdirectory(rpc)
|
|
33
|
+
endif()
|
|
34
|
+
if (NOT GGML_BACKEND_DL)
|
|
35
|
+
# these examples use the backends directly and cannot be built with dynamic loading
|
|
36
|
+
add_subdirectory(cvector-generator)
|
|
37
|
+
add_subdirectory(export-lora)
|
|
38
|
+
endif()
|
|
39
|
+
endif()
|
|
@@ -123,8 +123,8 @@ int main(int argc, char ** argv) {
|
|
|
123
123
|
|
|
124
124
|
common_batch_clear(batch);
|
|
125
125
|
|
|
126
|
-
for (int
|
|
127
|
-
for (int
|
|
126
|
+
for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
|
|
127
|
+
for (int i = 0; i < pp; ++i) {
|
|
128
128
|
common_batch_add(batch, 0, i, { j }, false);
|
|
129
129
|
}
|
|
130
130
|
}
|
|
@@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) {
|
|
|
24
24
|
LOG("\n %s \\\n"
|
|
25
25
|
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
|
|
26
26
|
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
|
|
27
|
-
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]
|
|
27
|
+
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
|
|
28
|
+
" [--parse-special]\n" , argv[0]);
|
|
28
29
|
LOG("\n");
|
|
29
30
|
}
|
|
30
31
|
|
|
@@ -46,7 +47,7 @@ private:
|
|
|
46
47
|
common_params m_params;
|
|
47
48
|
std::mutex m_mutex;
|
|
48
49
|
int m_last_call = 0;
|
|
49
|
-
std::vector<
|
|
50
|
+
std::vector<char> m_src1_data;
|
|
50
51
|
std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
|
|
51
52
|
};
|
|
52
53
|
|
|
@@ -93,11 +94,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|
|
93
94
|
const bool is_host = ggml_backend_buffer_is_host(src1->buffer);
|
|
94
95
|
|
|
95
96
|
if (!is_host) {
|
|
96
|
-
|
|
97
|
-
|
|
97
|
+
const size_t src1_nbytes = ggml_nbytes(src1);
|
|
98
|
+
m_src1_data.resize(src1_nbytes);
|
|
99
|
+
ggml_backend_tensor_get(src1, m_src1_data.data(), 0, src1_nbytes);
|
|
98
100
|
}
|
|
99
101
|
|
|
100
|
-
const
|
|
102
|
+
const char * data = is_host ? (const char *) src1->data : m_src1_data.data();
|
|
103
|
+
GGML_ASSERT(src1->nb[0] == ggml_element_size(src1));
|
|
101
104
|
|
|
102
105
|
// this has been adapted to the new format of storing merged experts in a single 3d tensor
|
|
103
106
|
// ref: https://github.com/ggml-org/llama.cpp/pull/6387
|
|
@@ -144,7 +147,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|
|
144
147
|
|
|
145
148
|
const int64_t i11 = idx % src1->ne[1];
|
|
146
149
|
const int64_t i12 = row;
|
|
147
|
-
const float * x = (const float *)(
|
|
150
|
+
const float * x = (const float *)(data + i11*src1->nb[1] + i12*src1->nb[2]);
|
|
148
151
|
|
|
149
152
|
for (int j = 0; j < (int)src1->ne[0]; ++j) {
|
|
150
153
|
e.values[e_start + j] += x[j]*x[j];
|
|
@@ -180,7 +183,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|
|
180
183
|
++e.ncall;
|
|
181
184
|
LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
|
|
182
185
|
for (int row = 0; row < (int)src1->ne[1]; ++row) {
|
|
183
|
-
const float * x = data + row * src1->
|
|
186
|
+
const float * x = (const float *) (data + row * src1->nb[1]);
|
|
184
187
|
for (int j = 0; j < (int)src1->ne[0]; ++j) {
|
|
185
188
|
e.values[j] += x[j]*x[j];
|
|
186
189
|
e.counts[j]++;
|
|
@@ -437,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
|
|
|
437
440
|
auto tim1 = std::chrono::high_resolution_clock::now();
|
|
438
441
|
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
|
439
442
|
|
|
440
|
-
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
|
443
|
+
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
|
|
441
444
|
|
|
442
445
|
auto tim2 = std::chrono::high_resolution_clock::now();
|
|
443
446
|
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
|
@@ -583,7 +586,6 @@ int main(int argc, char ** argv) {
|
|
|
583
586
|
params.out_file = "imatrix.dat" ;
|
|
584
587
|
|
|
585
588
|
params.n_ctx = 512;
|
|
586
|
-
params.logits_all = true;
|
|
587
589
|
params.escape = false;
|
|
588
590
|
|
|
589
591
|
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) {
|