@fugood/llama.node 0.3.17 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CMakeLists.txt +3 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +39 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +366 -19
  24. package/src/LlamaCompletionWorker.h +30 -10
  25. package/src/LlamaContext.cpp +213 -5
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
  29. package/src/llama.cpp/.github/workflows/build.yml +41 -762
  30. package/src/llama.cpp/.github/workflows/docker.yml +5 -2
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +12 -12
  33. package/src/llama.cpp/CMakeLists.txt +5 -17
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +31 -3
  37. package/src/llama.cpp/common/arg.cpp +48 -29
  38. package/src/llama.cpp/common/chat.cpp +128 -106
  39. package/src/llama.cpp/common/chat.h +2 -0
  40. package/src/llama.cpp/common/common.cpp +37 -1
  41. package/src/llama.cpp/common/common.h +18 -9
  42. package/src/llama.cpp/common/llguidance.cpp +1 -0
  43. package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
  44. package/src/llama.cpp/common/minja/minja.hpp +69 -36
  45. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  46. package/src/llama.cpp/common/regex-partial.h +56 -0
  47. package/src/llama.cpp/common/sampling.cpp +57 -50
  48. package/src/llama.cpp/examples/CMakeLists.txt +2 -23
  49. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
  50. package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
  51. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  52. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  53. package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
  54. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  55. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  56. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  57. package/src/llama.cpp/ggml/include/ggml.h +10 -7
  58. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  60. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  61. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
  62. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  63. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
  64. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
  65. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
  66. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  67. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  68. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  69. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
  71. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  73. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
  74. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
  75. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  76. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  77. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
  78. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
  80. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
  81. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
  82. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  83. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  84. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  85. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  86. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
  87. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  88. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
  89. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  90. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  91. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  92. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
  93. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
  94. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
  95. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
  96. package/src/llama.cpp/ggml/src/ggml.c +29 -20
  97. package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
  98. package/src/llama.cpp/include/llama.h +52 -11
  99. package/src/llama.cpp/requirements/requirements-all.txt +3 -3
  100. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  101. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  102. package/src/llama.cpp/src/llama-adapter.cpp +6 -0
  103. package/src/llama.cpp/src/llama-arch.cpp +3 -0
  104. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  105. package/src/llama.cpp/src/llama-batch.h +2 -1
  106. package/src/llama.cpp/src/llama-chat.cpp +17 -7
  107. package/src/llama.cpp/src/llama-chat.h +1 -0
  108. package/src/llama.cpp/src/llama-context.cpp +389 -501
  109. package/src/llama.cpp/src/llama-context.h +44 -32
  110. package/src/llama.cpp/src/llama-cparams.h +1 -0
  111. package/src/llama.cpp/src/llama-graph.cpp +20 -38
  112. package/src/llama.cpp/src/llama-graph.h +12 -8
  113. package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
  114. package/src/llama.cpp/src/llama-kv-cache.h +271 -85
  115. package/src/llama.cpp/src/llama-memory.h +11 -1
  116. package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
  117. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  118. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  119. package/src/llama.cpp/src/llama-model.cpp +316 -69
  120. package/src/llama.cpp/src/llama-model.h +8 -1
  121. package/src/llama.cpp/src/llama-quant.cpp +15 -13
  122. package/src/llama.cpp/src/llama-sampling.cpp +18 -6
  123. package/src/llama.cpp/src/llama-vocab.cpp +42 -4
  124. package/src/llama.cpp/src/llama-vocab.h +6 -0
  125. package/src/llama.cpp/src/llama.cpp +14 -0
  126. package/src/llama.cpp/tests/CMakeLists.txt +10 -2
  127. package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
  128. package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
  129. package/src/llama.cpp/tests/test-chat.cpp +3 -1
  130. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  131. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  132. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  133. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  134. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  135. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
  136. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  137. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
  138. package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
  139. package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
  140. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
  141. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
  142. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  143. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
  144. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  145. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
  146. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  147. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
  148. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
  149. package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
  150. package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
  151. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  152. package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
  153. package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
  154. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  155. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  156. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  157. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  158. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  159. package/src/llama.cpp/examples/llava/clip.h +0 -135
  160. package/src/llama.cpp/examples/llava/llava.cpp +0 -586
  161. package/src/llama.cpp/examples/llava/llava.h +0 -49
  162. package/src/llama.cpp/examples/llava/mtmd.h +0 -168
  163. package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
  164. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  165. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  166. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  167. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  168. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  169. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  170. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  171. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  172. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  173. /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
  174. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  175. /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
  176. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  177. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  178. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  179. /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
  180. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  181. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  182. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  183. /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
  184. /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
  185. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  186. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  187. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  188. /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
  189. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  190. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  191. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  192. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
  193. /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
@@ -823,7 +823,7 @@ struct test_case {
823
823
 
824
824
  ggml_build_forward_expand(gf, out);
825
825
  ggml_graph_cpy(gf, gb);
826
- ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false);
826
+ ggml_build_backward_expand(ctx.get(), gb, nullptr);
827
827
  if (expect.size() != 1 || expect[0] != 0.0f) {
828
828
  GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
829
829
  for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
@@ -1026,7 +1026,7 @@ struct test_example : public test_case {
1026
1026
  // Step 3: return the output tensor.
1027
1027
  return out;
1028
1028
  }
1029
- // In order to also check the gradients for your op, add calls like ggml_set_param(ctx, a)
1029
+ // In order to also check the gradients for your op, add calls like ggml_set_param(a)
1030
1030
  // immediately after you create the tensors.
1031
1031
  // This is optional and only makes sense if a backward pass has actually been implemented for the new op.
1032
1032
  };
@@ -1058,7 +1058,7 @@ struct test_unary : public test_case {
1058
1058
  auto ne = ne_a; ne[0] *= 3;
1059
1059
  a = ggml_new_tensor(ctx, type, 4, ne.data());
1060
1060
  if (grad_supported) {
1061
- ggml_set_param(ctx, a);
1061
+ ggml_set_param(a);
1062
1062
  }
1063
1063
  ggml_set_name(a, "a");
1064
1064
 
@@ -1067,7 +1067,7 @@ struct test_unary : public test_case {
1067
1067
  } else {
1068
1068
  a = ggml_new_tensor(ctx, type, 4, ne_a.data());
1069
1069
  if (grad_supported) {
1070
- ggml_set_param(ctx, a);
1070
+ ggml_set_param(a);
1071
1071
  }
1072
1072
  ggml_set_name(a, "a");
1073
1073
  }
@@ -1133,7 +1133,7 @@ struct test_get_rows : public test_case {
1133
1133
 
1134
1134
  const bool grad_supported = ggml_is_matrix(in) && ggml_is_vector(rows);
1135
1135
  if (grad_supported) {
1136
- ggml_set_param(ctx, in);
1136
+ ggml_set_param(in);
1137
1137
  // rows is a constant input -> no gradients
1138
1138
  }
1139
1139
 
@@ -1322,7 +1322,7 @@ struct test_repeat : public test_case {
1322
1322
  ggml_set_name(target, "target");
1323
1323
 
1324
1324
  ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
1325
- ggml_set_param(ctx, src);
1325
+ ggml_set_param(src);
1326
1326
  ggml_set_name(src, "src");
1327
1327
 
1328
1328
  ggml_tensor * out = ggml_repeat(ctx, src, target);
@@ -1406,7 +1406,7 @@ struct test_dup : public test_case {
1406
1406
 
1407
1407
  ggml_tensor * build_graph(ggml_context * ctx) override {
1408
1408
  ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
1409
- ggml_set_param(ctx, src);
1409
+ ggml_set_param(src);
1410
1410
  ggml_set_name(src, "src");
1411
1411
 
1412
1412
  if (_use_permute) {
@@ -1442,7 +1442,7 @@ struct test_set : public test_case {
1442
1442
 
1443
1443
  ggml_tensor * build_graph(ggml_context * ctx) override {
1444
1444
  ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
1445
- ggml_set_param(ctx, src);
1445
+ ggml_set_param(src);
1446
1446
  ggml_set_name(src, "src");
1447
1447
 
1448
1448
  auto ne_dst = ne;
@@ -1450,7 +1450,7 @@ struct test_set : public test_case {
1450
1450
  ne_dst[i] *= 2;
1451
1451
  }
1452
1452
  ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
1453
- ggml_set_param(ctx, dst);
1453
+ ggml_set_param(dst);
1454
1454
  ggml_set_name(dst, "dst");
1455
1455
 
1456
1456
  size_t offset = 0;
@@ -1498,7 +1498,7 @@ struct test_cpy : public test_case {
1498
1498
 
1499
1499
  ggml_tensor * build_graph(ggml_context * ctx) override {
1500
1500
  ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
1501
- ggml_set_param(ctx, src);
1501
+ ggml_set_param(src);
1502
1502
  ggml_set_name(src, "src");
1503
1503
 
1504
1504
  if (_src_use_permute) {
@@ -1536,7 +1536,7 @@ struct test_cont : public test_case {
1536
1536
 
1537
1537
  ggml_tensor * build_graph(ggml_context * ctx) override {
1538
1538
  ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
1539
- ggml_set_param(ctx, src);
1539
+ ggml_set_param(src);
1540
1540
  ggml_set_name(src, "src");
1541
1541
 
1542
1542
  src = ggml_transpose(ctx, src);
@@ -1583,8 +1583,8 @@ struct test_bin_bcast : public test_case {
1583
1583
  // The backward pass supports broadcasting only for GGML_ADD:
1584
1584
  const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
1585
1585
  if (grad_supported) {
1586
- ggml_set_param(ctx, a);
1587
- ggml_set_param(ctx, b);
1586
+ ggml_set_param(a);
1587
+ ggml_set_param(b);
1588
1588
  }
1589
1589
 
1590
1590
  ggml_tensor * out = op(ctx, a, b);
@@ -1632,11 +1632,11 @@ struct test_add1 : public test_case {
1632
1632
 
1633
1633
  ggml_tensor * build_graph(ggml_context * ctx) override {
1634
1634
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
1635
- ggml_set_param(ctx, a);
1635
+ ggml_set_param(a);
1636
1636
  ggml_set_name(a, "a");
1637
1637
 
1638
1638
  ggml_tensor * b = ggml_new_tensor_1d(ctx, type, 1);
1639
- // ggml_set_param(ctx, b); // TODO: implement
1639
+ // ggml_set_param(b); // TODO: implement
1640
1640
  ggml_set_name(b, "b");
1641
1641
 
1642
1642
  ggml_tensor * out = ggml_add1(ctx, a, b);
@@ -1667,7 +1667,7 @@ struct test_scale : public test_case {
1667
1667
 
1668
1668
  ggml_tensor * build_graph(ggml_context * ctx) override {
1669
1669
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
1670
- ggml_set_param(ctx, a);
1670
+ ggml_set_param(a);
1671
1671
  ggml_set_name(a, "a");
1672
1672
 
1673
1673
  ggml_tensor * out = ggml_scale(ctx, a, scale);
@@ -1762,7 +1762,7 @@ struct test_rms_norm : public test_case {
1762
1762
 
1763
1763
  ggml_tensor * build_graph(ggml_context * ctx) override {
1764
1764
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
1765
- ggml_set_param(ctx, a);
1765
+ ggml_set_param(a);
1766
1766
  ggml_set_name(a, "a");
1767
1767
 
1768
1768
  if (v) {
@@ -1981,7 +1981,7 @@ struct test_mul_mat : public test_case {
1981
1981
  const std::array<int64_t, 2> bs; // dims 3 and 4
1982
1982
  const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
1983
1983
  const std::array<int64_t, 4> per; // permutation of dimensions
1984
- const bool v; // whether a is a non-contiguous view
1984
+ const bool v; // whether a and b are non-contiguous views
1985
1985
 
1986
1986
  std::string vars() override {
1987
1987
  return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v);
@@ -2028,9 +2028,9 @@ struct test_mul_mat : public test_case {
2028
2028
  b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
2029
2029
  if (!ggml_is_quantized(type_a)) {
2030
2030
  if (bs[1] == 1 && nr[1] == 1) {
2031
- ggml_set_param(ctx, a);
2031
+ ggml_set_param(a);
2032
2032
  }
2033
- ggml_set_param(ctx, b);
2033
+ ggml_set_param(b);
2034
2034
  }
2035
2035
  ggml_set_name(a, "a");
2036
2036
  ggml_set_name(b, "b");
@@ -2040,19 +2040,29 @@ struct test_mul_mat : public test_case {
2040
2040
  ggml_set_name(a, "a_permuted");
2041
2041
  ggml_set_name(b, "b_permuted");
2042
2042
  } else {
2043
-
2044
2043
  if (v) {
2045
- a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
2046
- a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
2044
+ a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
2045
+ b = ggml_new_tensor_4d(ctx, type_b, k*2, n, bs[0]*nr[0], bs[1]*nr[1]);
2046
+
2047
+ if (!ggml_is_quantized(type_a)) {
2048
+ if (bs[1] == 1 && nr[1] == 1) {
2049
+ ggml_set_param(a);
2050
+ }
2051
+ ggml_set_param(b);
2052
+ }
2053
+
2054
+ a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
2055
+ b = ggml_view_4d(ctx, b, k, n, bs[0]*nr[0], bs[1]*nr[1], b->nb[1], b->nb[2], b->nb[3], 0);
2047
2056
  } else {
2048
2057
  a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
2049
- }
2050
- b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
2051
- if (!ggml_is_quantized(type_a)) {
2052
- if (bs[1] == 1 && nr[1] == 1) {
2053
- ggml_set_param(ctx, a);
2058
+ b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
2059
+
2060
+ if (!ggml_is_quantized(type_a)) {
2061
+ if (bs[1] == 1 && nr[1] == 1) {
2062
+ ggml_set_param(a);
2063
+ }
2064
+ ggml_set_param(b);
2054
2065
  }
2055
- ggml_set_param(ctx, b);
2056
2066
  }
2057
2067
  ggml_set_name(a, "a");
2058
2068
  ggml_set_name(b, "b");
@@ -2201,7 +2211,7 @@ struct test_sqr : public test_case {
2201
2211
 
2202
2212
  ggml_tensor * build_graph(ggml_context * ctx) override {
2203
2213
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2204
- ggml_set_param(ctx, a);
2214
+ ggml_set_param(a);
2205
2215
  ggml_set_name(a, "a");
2206
2216
 
2207
2217
  ggml_tensor * out = ggml_sqr(ctx, a);
@@ -2230,7 +2240,7 @@ struct test_sqrt : public test_case {
2230
2240
 
2231
2241
  ggml_tensor * build_graph(ggml_context * ctx) override {
2232
2242
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2233
- ggml_set_param(ctx, a);
2243
+ ggml_set_param(a);
2234
2244
  ggml_set_name(a, "a");
2235
2245
 
2236
2246
  ggml_tensor * out = ggml_sqrt(ctx, a);
@@ -2270,7 +2280,7 @@ struct test_log : public test_case {
2270
2280
 
2271
2281
  ggml_tensor * build_graph(ggml_context * ctx) override {
2272
2282
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2273
- ggml_set_param(ctx, a);
2283
+ ggml_set_param(a);
2274
2284
  ggml_set_name(a, "a");
2275
2285
 
2276
2286
  ggml_tensor * out = ggml_log(ctx, a);
@@ -2306,7 +2316,7 @@ struct test_sin : public test_case {
2306
2316
 
2307
2317
  ggml_tensor * build_graph(ggml_context * ctx) override {
2308
2318
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2309
- ggml_set_param(ctx, a);
2319
+ ggml_set_param(a);
2310
2320
  ggml_set_name(a, "a");
2311
2321
 
2312
2322
  ggml_tensor * out = ggml_sin(ctx, a);
@@ -2349,7 +2359,7 @@ struct test_cos : public test_case {
2349
2359
 
2350
2360
  ggml_tensor * build_graph(ggml_context * ctx) override {
2351
2361
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2352
- ggml_set_param(ctx, a);
2362
+ ggml_set_param(a);
2353
2363
  ggml_set_name(a, "a");
2354
2364
 
2355
2365
  ggml_tensor * out = ggml_cos(ctx, a);
@@ -2429,7 +2439,7 @@ struct test_diag_mask_inf : public test_case {
2429
2439
 
2430
2440
  ggml_tensor * build_graph(ggml_context * ctx) override {
2431
2441
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2432
- ggml_set_param(ctx, a);
2442
+ ggml_set_param(a);
2433
2443
  ggml_set_name(a, "a");
2434
2444
 
2435
2445
  ggml_tensor * out = ggml_diag_mask_inf(ctx, a, n_past);
@@ -2468,7 +2478,7 @@ struct test_soft_max : public test_case {
2468
2478
 
2469
2479
  ggml_tensor * build_graph(ggml_context * ctx) override {
2470
2480
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2471
- ggml_set_param(ctx, a);
2481
+ ggml_set_param(a);
2472
2482
  ggml_set_name(a, "a");
2473
2483
 
2474
2484
  ggml_tensor * mask = nullptr;
@@ -2550,7 +2560,7 @@ struct test_rope : public test_case {
2550
2560
  auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
2551
2561
  a = ggml_new_tensor(ctx, type, 4, ne.data());
2552
2562
  if (forward) {
2553
- ggml_set_param(ctx, a);
2563
+ ggml_set_param(a);
2554
2564
  }
2555
2565
  ggml_set_name(a, "a");
2556
2566
 
@@ -2559,7 +2569,7 @@ struct test_rope : public test_case {
2559
2569
  } else {
2560
2570
  a = ggml_new_tensor(ctx, type, 4, ne_a.data());
2561
2571
  if (forward) {
2562
- ggml_set_param(ctx, a);
2572
+ ggml_set_param(a);
2563
2573
  }
2564
2574
  ggml_set_name(a, "a");
2565
2575
  }
@@ -2673,7 +2683,7 @@ struct test_pool2d : public test_case {
2673
2683
 
2674
2684
  ggml_tensor * build_graph(ggml_context * ctx) override {
2675
2685
  ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
2676
- ggml_set_param(ctx, input);
2686
+ ggml_set_param(input);
2677
2687
  ggml_set_name(input, "input");
2678
2688
 
2679
2689
  ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1);
@@ -2749,7 +2759,7 @@ struct test_im2col : public test_case {
2749
2759
 
2750
2760
  ggml_tensor * build_graph(ggml_context * ctx) override {
2751
2761
  ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
2752
- ggml_set_param(ctx, input);
2762
+ ggml_set_param(input);
2753
2763
  ggml_set_name(input, "input");
2754
2764
 
2755
2765
  ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
@@ -2762,6 +2772,48 @@ struct test_im2col : public test_case {
2762
2772
  }
2763
2773
  };
2764
2774
 
2775
+ // GGML_OP_CONV_2D_DW
2776
+ struct test_conv_2d_dw : public test_case {
2777
+ const std::array<int64_t, 4> ne_input;
2778
+ const std::array<int64_t, 4> ne_kernel;
2779
+ const int stride;
2780
+ const int padding;
2781
+ const int dilation;
2782
+ const bool cwhn;
2783
+
2784
+ std::string vars() override {
2785
+ return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn);
2786
+ }
2787
+
2788
+ test_conv_2d_dw(std::array<int64_t, 4> ne_input = {64, 64, 16, 1},
2789
+ std::array<int64_t, 4> ne_kernel = {3, 3, 1, 16},
2790
+ int stride = 1, int padding = 0, int dilation = 1, bool cwhn = false)
2791
+ : ne_input(ne_input), ne_kernel(ne_kernel), stride(stride), padding(padding), dilation(dilation), cwhn(cwhn) {}
2792
+
2793
+ ggml_tensor * build_graph(ggml_context * ctx) override {
2794
+ ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
2795
+ ggml_set_name(input, "input");
2796
+
2797
+ ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
2798
+ ggml_set_name(kernel, "kernel");
2799
+
2800
+ if (cwhn) {
2801
+ // change memory layout to channel-most-contiguous (CWHN),
2802
+ // then permute it back so NE matches the original input
2803
+ input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
2804
+ input = ggml_permute(ctx, input, 2, 0, 1, 3);
2805
+ kernel = ggml_cont(ctx, ggml_permute(ctx, kernel, 2, 3, 1, 0));
2806
+ kernel = ggml_permute(ctx, kernel, 3, 2, 0, 1);
2807
+ }
2808
+
2809
+ ggml_tensor * out = ggml_conv_2d_dw_direct(
2810
+ ctx, kernel, input,
2811
+ stride, stride, padding, padding, dilation, dilation);
2812
+ ggml_set_name(out, "out");
2813
+ return out;
2814
+ }
2815
+ };
2816
+
2765
2817
  // GGML_OP_CONCAT
2766
2818
  struct test_concat : public test_case {
2767
2819
  const ggml_type type;
@@ -2884,7 +2936,7 @@ struct test_sum : public test_case {
2884
2936
 
2885
2937
  ggml_tensor * build_graph(ggml_context * ctx) override {
2886
2938
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2887
- ggml_set_param(ctx, a);
2939
+ ggml_set_param(a);
2888
2940
  ggml_set_name(a, "a");
2889
2941
 
2890
2942
  ggml_tensor * out = ggml_sum(ctx, a);
@@ -2913,7 +2965,7 @@ struct test_sum_rows : public test_case {
2913
2965
 
2914
2966
  ggml_tensor * build_graph(ggml_context * ctx) override {
2915
2967
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2916
- ggml_set_param(ctx, a);
2968
+ ggml_set_param(a);
2917
2969
  ggml_set_name(a, "a");
2918
2970
 
2919
2971
  ggml_tensor * out = ggml_sum_rows(ctx, a);
@@ -2938,7 +2990,7 @@ struct test_mean : public test_case {
2938
2990
 
2939
2991
  ggml_tensor * build_graph(ggml_context * ctx) override {
2940
2992
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
2941
- ggml_set_param(ctx, a);
2993
+ ggml_set_param(a);
2942
2994
  ggml_set_name(a, "a");
2943
2995
 
2944
2996
  ggml_tensor * out = ggml_mean(ctx, a);
@@ -3084,11 +3136,11 @@ struct test_acc : public test_case {
3084
3136
 
3085
3137
  ggml_tensor * build_graph(ggml_context * ctx) override {
3086
3138
  ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
3087
- ggml_set_param(ctx, a);
3139
+ ggml_set_param(a);
3088
3140
  ggml_set_name(a, "a");
3089
3141
 
3090
3142
  ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
3091
- ggml_set_param(ctx, b);
3143
+ ggml_set_param(b);
3092
3144
  ggml_set_name(b, "b");
3093
3145
 
3094
3146
  ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
@@ -3325,7 +3377,7 @@ struct test_cross_entropy_loss : public test_case {
3325
3377
 
3326
3378
  ggml_tensor * build_graph(ggml_context * ctx) override {
3327
3379
  ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
3328
- ggml_set_param(ctx, logits);
3380
+ ggml_set_param(logits);
3329
3381
  ggml_set_name(logits, "logits");
3330
3382
 
3331
3383
  ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
@@ -3407,7 +3459,7 @@ struct test_opt_step_adamw : public test_case {
3407
3459
 
3408
3460
  ggml_tensor * build_graph(ggml_context * ctx) override {
3409
3461
  ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
3410
- ggml_set_param(ctx, a); // Despite tensor a having gradients the output tensor will not.
3462
+ ggml_set_param(a); // Despite tensor a having gradients the output tensor will not.
3411
3463
  ggml_set_name(a, "a");
3412
3464
 
3413
3465
  ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
@@ -3972,6 +4024,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
3972
4024
  // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
3973
4025
  // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
3974
4026
 
4027
+ test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, false));
4028
+ test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, true));
4029
+ test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, false));
4030
+ test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, true));
4031
+
3975
4032
  test_cases.emplace_back(new test_conv_transpose_1d());
3976
4033
  test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
3977
4034
  test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
@@ -4546,6 +4603,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
4546
4603
  }
4547
4604
  }
4548
4605
 
4606
+ test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
4607
+ test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
4608
+
4549
4609
  return test_cases;
4550
4610
  }
4551
4611
 
@@ -181,21 +181,20 @@ int main(void) {
181
181
  },
182
182
  {
183
183
  /* .name= */ "ChatGLM4",
184
- /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}"),
185
- /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
184
+ /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
185
+ /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n",
186
186
  /* .expected_output_jinja= */ "",
187
187
  /* .bos_token= */ "",
188
188
  /* .eos_token= */ "",
189
189
  },
190
- // TODO @ngxson : GLMEdge produces poor result without `[gMASK]<sop>`, so we're temporarily using GLM4 template for it. We should fix this in the future.
191
- // {
192
- // /* .name= */ "GLMEdge",
193
- // /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
194
- // /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
195
- // /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
196
- // /* .bos_token= */ "",
197
- // /* .eos_token= */ "",
198
- // },
190
+ {
191
+ /* .name= */ "GLMEdge",
192
+ /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
193
+ /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
194
+ /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
195
+ /* .bos_token= */ "",
196
+ /* .eos_token= */ "",
197
+ },
199
198
  {
200
199
  /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
201
200
  /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
@@ -832,7 +832,9 @@ static void test_template_output_parsers() {
832
832
  assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
833
833
  common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
834
834
  assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
835
- common_chat_templates_apply(tmpls.get(), inputs_tools).format);
835
+ common_chat_templates_apply(tmpls.get(), inputs_tools).format);
836
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
837
+ common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
836
838
 
837
839
  test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
838
840
  test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
@@ -0,0 +1,63 @@
1
+ #include <stdio.h>
2
+ #include <assert.h>
3
+
4
+ #include "mtmd.h"
5
+
6
+ int main(void) {
7
+ printf("\n\nTesting libmtmd C API...\n");
8
+ printf("--------\n\n");
9
+
10
+ struct mtmd_context_params params = mtmd_context_params_default();
11
+ printf("Default image marker: %s\n", params.image_marker);
12
+
13
+ mtmd_input_chunks * chunks = mtmd_test_create_input_chunks();
14
+
15
+ if (!chunks) {
16
+ fprintf(stderr, "Failed to create input chunks\n");
17
+ return 1;
18
+ }
19
+
20
+ size_t n_chunks = mtmd_input_chunks_size(chunks);
21
+ printf("Number of chunks: %zu\n", n_chunks);
22
+ assert(n_chunks > 0);
23
+
24
+ for (size_t i = 0; i < n_chunks; i++) {
25
+ const mtmd_input_chunk * chunk = mtmd_input_chunks_get(chunks, i);
26
+ assert(chunk != NULL);
27
+ enum mtmd_input_chunk_type type = mtmd_input_chunk_get_type(chunk);
28
+ printf("Chunk %zu type: %d\n", i, type);
29
+
30
+ if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
31
+ size_t n_tokens;
32
+ const llama_token * tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
33
+ printf(" Text chunk with %zu tokens\n", n_tokens);
34
+ assert(tokens != NULL);
35
+ assert(n_tokens > 0);
36
+ for (size_t j = 0; j < n_tokens; j++) {
37
+ assert(tokens[j] >= 0);
38
+ printf(" > Token %zu: %d\n", j, tokens[j]);
39
+ }
40
+
41
+ } else if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
42
+ const mtmd_image_tokens * image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
43
+ size_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
44
+ size_t nx = mtmd_image_tokens_get_nx(image_tokens);
45
+ size_t ny = mtmd_image_tokens_get_ny(image_tokens);
46
+ const char * id = mtmd_image_tokens_get_id(image_tokens);
47
+ assert(n_tokens > 0);
48
+ assert(nx > 0);
49
+ assert(ny > 0);
50
+ assert(id != NULL);
51
+ printf(" Image chunk with %zu tokens\n", n_tokens);
52
+ printf(" Image size: %zu x %zu\n", nx, ny);
53
+ printf(" Image ID: %s\n", id);
54
+ }
55
+ }
56
+
57
+ // Free the chunks
58
+ mtmd_input_chunks_free(chunks);
59
+
60
+ printf("\n\nDONE: test libmtmd C API...\n");
61
+
62
+ return 0;
63
+ }