@fugood/llama.node 0.3.16 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CMakeLists.txt +3 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +5 -0
  19. package/package.json +1 -1
  20. package/src/LlamaCompletionWorker.cpp +8 -0
  21. package/src/LlamaCompletionWorker.h +1 -0
  22. package/src/LlamaContext.cpp +3 -2
  23. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +124 -0
  24. package/src/llama.cpp/.github/workflows/build.yml +70 -27
  25. package/src/llama.cpp/.github/workflows/docker.yml +6 -6
  26. package/src/llama.cpp/.github/workflows/server.yml +7 -11
  27. package/src/llama.cpp/CMakeLists.txt +23 -1
  28. package/src/llama.cpp/common/CMakeLists.txt +6 -3
  29. package/src/llama.cpp/common/arg.cpp +809 -105
  30. package/src/llama.cpp/common/arg.h +9 -0
  31. package/src/llama.cpp/common/chat.cpp +1 -1
  32. package/src/llama.cpp/common/common.cpp +31 -521
  33. package/src/llama.cpp/common/common.h +17 -36
  34. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  35. package/src/llama.cpp/common/llguidance.cpp +30 -47
  36. package/src/llama.cpp/common/minja/chat-template.hpp +15 -7
  37. package/src/llama.cpp/common/minja/minja.hpp +119 -93
  38. package/src/llama.cpp/common/sampling.cpp +3 -0
  39. package/src/llama.cpp/docs/build.md +122 -7
  40. package/src/llama.cpp/examples/CMakeLists.txt +0 -9
  41. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  42. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
  43. package/src/llama.cpp/examples/embedding/embedding.cpp +7 -1
  44. package/src/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
  45. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -16
  46. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  47. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +210 -8
  48. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  49. package/src/llama.cpp/examples/llava/CMakeLists.txt +39 -24
  50. package/src/llama.cpp/examples/llava/clip-impl.h +345 -0
  51. package/src/llama.cpp/examples/llava/clip.cpp +2152 -1803
  52. package/src/llama.cpp/examples/llava/clip.h +39 -22
  53. package/src/llama.cpp/examples/llava/deprecation-warning.cpp +22 -0
  54. package/src/llama.cpp/examples/llava/llava.cpp +64 -52
  55. package/src/llama.cpp/examples/llava/mtmd-cli.cpp +344 -0
  56. package/src/llama.cpp/examples/llava/mtmd.cpp +708 -0
  57. package/src/llama.cpp/examples/llava/mtmd.h +168 -0
  58. package/src/llama.cpp/examples/llava/{qwen2vl-cli.cpp → qwen2vl-test.cpp} +83 -31
  59. package/src/llama.cpp/examples/main/main.cpp +16 -5
  60. package/src/llama.cpp/examples/parallel/parallel.cpp +3 -1
  61. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  62. package/src/llama.cpp/examples/perplexity/perplexity.cpp +17 -3
  63. package/src/llama.cpp/examples/quantize/quantize.cpp +115 -2
  64. package/src/llama.cpp/examples/rpc/CMakeLists.txt +4 -2
  65. package/src/llama.cpp/examples/rpc/rpc-server.cpp +163 -8
  66. package/src/llama.cpp/examples/run/CMakeLists.txt +12 -1
  67. package/src/llama.cpp/examples/run/run.cpp +14 -28
  68. package/src/llama.cpp/examples/server/httplib.h +313 -247
  69. package/src/llama.cpp/examples/server/server.cpp +238 -139
  70. package/src/llama.cpp/examples/server/utils.hpp +51 -2
  71. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  72. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  73. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  74. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  75. package/src/llama.cpp/examples/tts/tts.cpp +6 -9
  76. package/src/llama.cpp/ggml/CMakeLists.txt +8 -2
  77. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  78. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  79. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  80. package/src/llama.cpp/ggml/include/ggml.h +66 -99
  81. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  82. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  83. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  84. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  85. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  86. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  87. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  88. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  89. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  90. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +48 -22
  91. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  92. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  93. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  94. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -192
  96. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +754 -404
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1003 -13519
  99. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +2 -7
  101. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +0 -1
  102. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +3 -4
  103. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +533 -88
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8809 -0
  105. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  106. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  107. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  108. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  109. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +258 -0
  110. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  111. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  112. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  113. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  114. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  115. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +70 -3
  116. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  117. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -260
  118. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +293 -40
  119. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +96 -22
  120. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  121. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  123. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  124. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +2 -292
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +967 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  130. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +204 -280
  131. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  133. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  134. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  135. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  136. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  137. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  138. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +23 -0
  139. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +646 -114
  140. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +12 -0
  141. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +17 -8
  142. package/src/llama.cpp/ggml/src/ggml.c +141 -245
  143. package/src/llama.cpp/ggml/src/gguf.cpp +1 -0
  144. package/src/llama.cpp/include/llama.h +30 -11
  145. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  146. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  147. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  148. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  149. package/src/llama.cpp/requirements/requirements-all.txt +2 -0
  150. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  151. package/src/llama.cpp/src/CMakeLists.txt +3 -2
  152. package/src/llama.cpp/src/llama-adapter.cpp +37 -1
  153. package/src/llama.cpp/src/llama-arch.cpp +160 -17
  154. package/src/llama.cpp/src/llama-arch.h +16 -0
  155. package/src/llama.cpp/src/llama-chat.cpp +82 -17
  156. package/src/llama.cpp/src/llama-chat.h +6 -2
  157. package/src/llama.cpp/src/llama-context.cpp +108 -92
  158. package/src/llama.cpp/src/llama-context.h +1 -2
  159. package/src/llama.cpp/src/llama-graph.cpp +189 -119
  160. package/src/llama.cpp/src/llama-graph.h +26 -6
  161. package/src/llama.cpp/src/llama-hparams.h +13 -0
  162. package/src/llama.cpp/src/llama-kv-cache.cpp +70 -123
  163. package/src/llama.cpp/src/llama-kv-cache.h +41 -115
  164. package/src/llama.cpp/src/llama-memory.h +1 -1
  165. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  166. package/src/llama.cpp/src/llama-model-loader.cpp +10 -5
  167. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  168. package/src/llama.cpp/src/llama-model.cpp +1760 -534
  169. package/src/llama.cpp/src/llama-model.h +13 -1
  170. package/src/llama.cpp/src/llama-quant.cpp +29 -8
  171. package/src/llama.cpp/src/llama-sampling.cpp +7 -1
  172. package/src/llama.cpp/src/llama-vocab.cpp +44 -6
  173. package/src/llama.cpp/src/llama.cpp +1 -1
  174. package/src/llama.cpp/tests/CMakeLists.txt +43 -30
  175. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  176. package/src/llama.cpp/tests/test-backend-ops.cpp +82 -43
  177. package/src/llama.cpp/tests/test-chat-template.cpp +34 -13
  178. package/src/llama.cpp/tests/test-chat.cpp +12 -2
  179. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  180. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  181. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  182. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  183. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  184. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  185. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  186. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  187. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  188. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  189. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  190. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  191. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  192. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  193. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  194. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  195. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  196. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  197. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  198. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  199. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  200. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  201. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  202. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
@@ -1,6 +1,7 @@
1
1
  #ifndef CLIP_H
2
2
  #define CLIP_H
3
3
 
4
+ #include "ggml.h"
4
5
  #include <stddef.h>
5
6
  #include <stdint.h>
6
7
 
@@ -29,19 +30,13 @@ struct clip_image_size {
29
30
  int height;
30
31
  };
31
32
 
32
- struct clip_image_u8_batch {
33
- struct clip_image_u8 * data;
34
- size_t size;
35
- };
36
-
37
- struct clip_image_f32_batch {
38
- struct clip_image_f32 * data;
39
- size_t size;
40
- };
33
+ struct clip_image_f32;
34
+ struct clip_image_u8_batch;
35
+ struct clip_image_f32_batch;
41
36
 
42
37
  struct clip_context_params {
43
38
  bool use_gpu;
44
- int verbosity;
39
+ enum ggml_log_level verbosity;
45
40
  };
46
41
 
47
42
  // deprecated, use clip_init
@@ -52,11 +47,11 @@ CLIP_API struct clip_ctx * clip_init(const char * fname, struct clip_context_par
52
47
  CLIP_API void clip_free(struct clip_ctx * ctx);
53
48
 
54
49
  CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx);
55
- CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w);
50
+ CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_w, int img_h);
56
51
 
57
- CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx);
58
- CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx);
59
- CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx);
52
+ CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx);
53
+ CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx);
54
+ CLIP_API int32_t clip_get_hidden_size(const struct clip_ctx * ctx);
60
55
 
61
56
  // TODO: should be enum, not string
62
57
  CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);
@@ -64,23 +59,45 @@ CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);
64
59
  CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
65
60
  CLIP_API size_t get_clip_image_grid_size(const struct clip_ctx * ctx);
66
61
 
67
- CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
68
- CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx, struct clip_image_f32 * img);
69
- CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx);
62
+ GGML_DEPRECATED(CLIP_API int clip_n_patches(const struct clip_ctx * ctx),
63
+ "use clip_n_output_tokens instead");
64
+ GGML_DEPRECATED(CLIP_API int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * img),
65
+ "use clip_n_output_tokens instead");
66
+
67
+ CLIP_API int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * img);
68
+
69
+ // for M-RoPE, this will be the number of token positions in X and Y directions
70
+ // for other models, X will be the total number of tokens and Y will be 1
71
+ CLIP_API int clip_n_output_tokens_x(const struct clip_ctx * ctx, struct clip_image_f32 * img);
72
+ CLIP_API int clip_n_output_tokens_y(const struct clip_ctx * ctx, struct clip_image_f32 * img);
73
+
74
+ // this should be equal to the embedding dimension of the text model
75
+ CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx);
70
76
 
71
77
  CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
72
78
  CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
73
79
  CLIP_API struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip);
74
80
 
75
- CLIP_API struct clip_image_size * clip_image_size_init();
76
- CLIP_API struct clip_image_u8 * clip_image_u8_init ();
77
- CLIP_API struct clip_image_f32 * clip_image_f32_init();
81
+ CLIP_API struct clip_image_size * clip_image_size_init();
82
+ CLIP_API struct clip_image_u8 * clip_image_u8_init ();
83
+ CLIP_API struct clip_image_f32 * clip_image_f32_init();
84
+ CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init(); // only used by libllava
85
+
86
+ // nx, ny are the output image dimensions
87
+ CLIP_API unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny);
78
88
 
89
+ CLIP_API void clip_image_size_free (struct clip_image_size * img_size);
79
90
  CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
80
91
  CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
81
92
  CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
82
93
  CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
83
94
 
95
+ // use for accessing underlay data of clip_image_f32_batch
96
+ CLIP_API size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch); // equivalent to batch->size()
97
+ CLIP_API size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->nx
98
+ CLIP_API size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny
99
+ CLIP_API struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
100
+
84
101
  /**
85
102
  * Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
86
103
  * The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
@@ -105,8 +122,8 @@ CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out
105
122
  CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
106
123
  CLIP_API bool clip_is_glm(const struct clip_ctx * ctx);
107
124
  CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
108
-
109
- CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);
125
+ CLIP_API bool clip_is_llava(const struct clip_ctx * ctx);
126
+ CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx);
110
127
 
111
128
  CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
112
129
 
@@ -0,0 +1,22 @@
1
+ #include <cstdio>
2
+ #include <string>
3
+
4
+ int main(int argc, char** argv) {
5
+ std::string filename = "main";
6
+ if (argc >= 1) {
7
+ filename = argv[0];
8
+ }
9
+
10
+ // Get only the program name from the full path
11
+ size_t pos = filename.find_last_of("/\\");
12
+ if (pos != std::string::npos) {
13
+ filename = filename.substr(pos+1);
14
+ }
15
+
16
+ fprintf(stdout, "\n");
17
+ fprintf(stdout, "WARNING: The binary '%s' is deprecated.\n", filename.c_str());
18
+ fprintf(stdout, "Please use 'llama-mtmd-cli' instead.\n");
19
+ fprintf(stdout, "\n");
20
+
21
+ return EXIT_FAILURE;
22
+ }
@@ -10,6 +10,7 @@
10
10
  #include <cstring>
11
11
  #include <limits>
12
12
  #include <vector>
13
+ #include <memory>
13
14
 
14
15
  #if defined(LLAVA_LOG_OFF)
15
16
  # define LOG_INF(...)
@@ -45,6 +46,17 @@ struct clip_image_grid_shape {
45
46
  int second;
46
47
  };
47
48
 
49
+ // convenience cpp wrapper
50
+ struct clip_image_f32_batch_deleter {
51
+ void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); }
52
+ };
53
+ typedef std::unique_ptr<clip_image_f32_batch, clip_image_f32_batch_deleter> clip_image_f32_batch_ptr;
54
+
55
+ struct clip_image_size_deleter {
56
+ void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); }
57
+ };
58
+ typedef std::unique_ptr<clip_image_size, clip_image_size_deleter> clip_image_size_ptr;
59
+
48
60
  /**
49
61
  * Selects the best resolution from a list of possible resolutions based on the original size.
50
62
  *
@@ -100,13 +112,13 @@ static struct clip_image_grid_shape get_anyres_image_grid_shape(const std::pair<
100
112
  }
101
113
 
102
114
  // Take the image segments in a grid configuration and return the embeddings and the number of embeddings into preallocated memory (image_embd_out)
103
- static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *> & image_embd_v, struct clip_image_grid_shape grid_shape, float * image_embd_out, int * n_img_pos_out) {
115
+ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *> & image_embd_v, struct clip_image_grid_shape grid_shape, float * image_embd_out, int * n_img_pos_out, clip_image_f32 * img_input) {
104
116
  struct {
105
117
  struct ggml_context * ctx;
106
118
  } model;
107
119
 
108
- const int32_t image_size = clip_image_size(ctx_clip);
109
- const int32_t patch_size = clip_patch_size(ctx_clip);
120
+ const int32_t image_size = clip_get_image_size(ctx_clip);
121
+ const int32_t patch_size = clip_get_patch_size(ctx_clip);
110
122
 
111
123
  int32_t num_patches_per_side = image_size / patch_size; // 336 / 14 = 24 - used for embedding-patching boxes (24*24 = 576 patches)
112
124
 
@@ -163,7 +175,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
163
175
 
164
176
  model.ctx = ggml_init(params);
165
177
 
166
- struct ggml_tensor * image_features = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, clip_n_mmproj_embd(ctx_clip), clip_n_patches(ctx_clip), num_images - 1); // example: 4096 x 576 x 4
178
+ struct ggml_tensor * image_features = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, clip_n_mmproj_embd(ctx_clip), clip_n_output_tokens(ctx_clip, img_input), num_images - 1); // example: 4096 x 576 x 4
167
179
  // ggml_tensor_printf(image_features,"image_features",__LINE__,false,false);
168
180
  // fill it with the image embeddings, ignoring the base
169
181
  for (size_t i = 1; i < num_images; i++) {
@@ -202,8 +214,8 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
202
214
 
203
215
  memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
204
216
  // append without newline tokens (default behavior in llava_arch when not using unpad ):
205
- memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (num_images-1)); // grid patches
206
- *n_img_pos_out = static_cast<int>(result->ne[1]+clip_n_patches(ctx_clip));
217
+ memcpy(image_embd_out + clip_n_output_tokens(ctx_clip, img_input) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (num_images-1)); // grid patches
218
+ *n_img_pos_out = static_cast<int>(result->ne[1]+clip_n_output_tokens(ctx_clip, img_input));
207
219
 
208
220
  // Debug: Test single segments
209
221
  // Current findings: sending base image, sending a segment embedding all works similar to python
@@ -246,12 +258,9 @@ static clip_image_f32 * reshape_by_patch(clip_image_f32 * image, int patch_size)
246
258
 
247
259
  static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float * image_embd, int * n_img_pos) {
248
260
  // std::vector<clip_image_f32*> img_res_v; // format VectN x H x W x RGB (N x 336 x 336 x 3), so interleaved RGB - different to the python implementation which is N x 3 x 336 x 336
249
- clip_image_f32_batch img_res_v;
250
- img_res_v.size = 0;
251
- img_res_v.data = nullptr;
252
- if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) {
261
+ clip_image_f32_batch_ptr img_res_v(clip_image_f32_batch_init());
262
+ if (!clip_image_preprocess(ctx_clip, img, img_res_v.get())) {
253
263
  LOG_ERR("%s: unable to preprocess image\n", __func__);
254
- delete[] img_res_v.data;
255
264
  return false;
256
265
  }
257
266
 
@@ -259,66 +268,72 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
259
268
 
260
269
  const char * mm_patch_merge_type = clip_patch_merge_type(ctx_clip);
261
270
 
271
+ const size_t n_imgs = clip_image_f32_batch_n_images(img_res_v.get());
272
+
262
273
  if (clip_is_minicpmv(ctx_clip) || clip_is_qwen2vl(ctx_clip)) {
263
274
  std::vector<float *> image_embd_v;
264
- image_embd_v.resize(img_res_v.size);
265
- struct clip_image_size * load_image_size = clip_image_size_init();
275
+ image_embd_v.resize(n_imgs);
276
+ clip_image_size load_image_size;
266
277
 
267
- for (size_t i = 0; i < img_res_v.size; i++) {
278
+ for (size_t i = 0; i < n_imgs; i++) {
268
279
  const int64_t t_img_enc_step_start_us = ggml_time_us();
269
- image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny));
270
- int patch_size=14;
271
- load_image_size->width = img_res_v.data[i].nx;
272
- load_image_size->height = img_res_v.data[i].ny;
273
- clip_add_load_image_size(ctx_clip, load_image_size);
280
+ int nx = clip_image_f32_batch_nx(img_res_v.get(), i);
281
+ int ny = clip_image_f32_batch_ny(img_res_v.get(), i);
282
+ image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, nx, ny));
283
+ int patch_size = 14;
284
+ load_image_size.width = nx;
285
+ load_image_size.height = ny;
286
+ clip_add_load_image_size(ctx_clip, &load_image_size);
274
287
 
275
288
  bool encoded = false;
289
+ clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i);
276
290
  if (clip_is_qwen2vl(ctx_clip)) {
277
- encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
291
+ encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd_v[i]);
278
292
  }
279
293
  else {
280
- encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
294
+ encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(img_res, patch_size), image_embd_v[i]);
281
295
  }
282
296
 
283
297
  if (!encoded) {
284
- LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
298
+ LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) n_imgs);
285
299
  return false;
286
300
  }
287
301
  const int64_t t_img_enc_steop_batch_us = ggml_time_us();
288
- LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0);
302
+ LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)n_imgs, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0);
289
303
  }
290
304
  const int64_t t_img_enc_batch_us = ggml_time_us();
291
- LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
305
+ LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)n_imgs, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
292
306
 
293
307
  int n_img_pos_out = 0;
294
308
  for (size_t i = 0; i < image_embd_v.size(); i++) {
309
+ int nx = clip_image_f32_batch_nx(img_res_v.get(), i);
310
+ int ny = clip_image_f32_batch_ny(img_res_v.get(), i);
311
+ clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i);
295
312
  std::memcpy(
296
313
  image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip),
297
314
  image_embd_v[i],
298
- clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny));
299
- n_img_pos_out += clip_n_patches_by_img(ctx_clip, &img_res_v.data[i]);
315
+ clip_embd_nbytes_by_img(ctx_clip, nx, ny));
316
+ n_img_pos_out += clip_n_output_tokens(ctx_clip, img_res);
300
317
  }
301
318
  *n_img_pos = n_img_pos_out;
302
319
  for (size_t i = 0; i < image_embd_v.size(); i++) {
303
320
  free(image_embd_v[i]);
304
321
  }
305
322
  image_embd_v.clear();
306
- load_image_size->width = img->nx;
307
- load_image_size->height = img->ny;
308
- clip_add_load_image_size(ctx_clip, load_image_size);
309
- LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height);
310
- delete[] img_res_v.data;
311
- img_res_v.size = 0;
312
- img_res_v.data = nullptr;
323
+ load_image_size.width = img->nx;
324
+ load_image_size.height = img->ny;
325
+ clip_add_load_image_size(ctx_clip, &load_image_size);
326
+ LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size.width, load_image_size.height);
313
327
  }
314
328
  else if (clip_is_glm(ctx_clip)){
315
329
  struct clip_image_size * load_image_size = clip_image_size_init();
316
- load_image_size->width = img_res_v.data[0].nx;
317
- load_image_size->height = img_res_v.data[0].ny;
330
+ load_image_size->width = clip_image_f32_batch_nx(img_res_v.get(), 0);
331
+ load_image_size->height = clip_image_f32_batch_ny(img_res_v.get(), 0);
318
332
  clip_add_load_image_size(ctx_clip, load_image_size);
319
333
 
320
- bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd);
321
- int pos = int(load_image_size->width/clip_patch_size(ctx_clip)/2);
334
+ clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0);
335
+ bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd);
336
+ int pos = int(load_image_size->width/clip_get_patch_size(ctx_clip)/2);
322
337
  *n_img_pos = (pos * pos + 2);
323
338
  if (!encoded){
324
339
  LOG_ERR("Unable to encode image \n");
@@ -327,9 +342,9 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
327
342
  }
328
343
  else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
329
344
  // flat / default llava-1.5 type embedding
330
- *n_img_pos = clip_n_patches(ctx_clip);
331
- bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096
332
- delete[] img_res_v.data;
345
+ clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0);
346
+ *n_img_pos = clip_n_output_tokens(ctx_clip, img_res);
347
+ bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); // image_embd shape is 576 x 4096
333
348
  if (!encoded) {
334
349
  LOG_ERR("Unable to encode image\n");
335
350
 
@@ -340,17 +355,18 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
340
355
  // spatial_unpad llava-1.6 type embedding
341
356
  // TODO: CLIP needs batching support - in HF the llm projection is separate after encoding, which might be a solution to quickly get batching working
342
357
  std::vector<float *> image_embd_v;
343
- image_embd_v.resize(img_res_v.size);
344
- for (size_t i = 0; i < img_res_v.size; i++) {
358
+ image_embd_v.resize(n_imgs);
359
+ for (size_t i = 0; i < n_imgs; i++) {
360
+ clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i);
345
361
  image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184
346
- const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
362
+ const bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
347
363
  if (!encoded) {
348
- LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
364
+ LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) n_imgs);
349
365
  return false;
350
366
  }
351
367
  }
352
368
  const int64_t t_img_enc_batch_us = ggml_time_us();
353
- LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
369
+ LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)n_imgs, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
354
370
 
355
371
  const int32_t * image_grid = clip_image_grid(ctx_clip);
356
372
  const size_t num_gridpoints = get_clip_image_grid_size(ctx_clip);
@@ -360,17 +376,13 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
360
376
  grid_pinpoints.push_back({image_grid[i], image_grid[i+1]});
361
377
  }
362
378
 
363
- // free all img_res_v - not needed anymore
364
- delete[] img_res_v.data;
365
- img_res_v.size = 0;
366
- img_res_v.data = nullptr;
367
-
368
- const int32_t image_size = clip_image_size(ctx_clip);
379
+ const int32_t image_size = clip_get_image_size(ctx_clip);
369
380
 
370
381
  struct clip_image_grid_shape grid_shape = get_anyres_image_grid_shape({img->nx,img->ny}, grid_pinpoints, image_size);
371
382
 
372
383
  int n_img_pos_out;
373
- clip_llava_handle_patches(ctx_clip, image_embd_v, grid_shape, image_embd, &n_img_pos_out);
384
+ clip_image_f32 * img_input = clip_image_f32_get_img(img_res_v.get(), 0);
385
+ clip_llava_handle_patches(ctx_clip, image_embd_v, grid_shape, image_embd, &n_img_pos_out, img_input);
374
386
  *n_img_pos = n_img_pos_out;
375
387
 
376
388
  for (size_t i = 0; i < image_embd_v.size(); i++) {