@fugood/llama.node 0.3.17 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CMakeLists.txt +3 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +39 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +366 -19
  24. package/src/LlamaCompletionWorker.h +30 -10
  25. package/src/LlamaContext.cpp +213 -5
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
  29. package/src/llama.cpp/.github/workflows/build.yml +41 -762
  30. package/src/llama.cpp/.github/workflows/docker.yml +5 -2
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +12 -12
  33. package/src/llama.cpp/CMakeLists.txt +5 -17
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +31 -3
  37. package/src/llama.cpp/common/arg.cpp +48 -29
  38. package/src/llama.cpp/common/chat.cpp +128 -106
  39. package/src/llama.cpp/common/chat.h +2 -0
  40. package/src/llama.cpp/common/common.cpp +37 -1
  41. package/src/llama.cpp/common/common.h +18 -9
  42. package/src/llama.cpp/common/llguidance.cpp +1 -0
  43. package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
  44. package/src/llama.cpp/common/minja/minja.hpp +69 -36
  45. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  46. package/src/llama.cpp/common/regex-partial.h +56 -0
  47. package/src/llama.cpp/common/sampling.cpp +57 -50
  48. package/src/llama.cpp/examples/CMakeLists.txt +2 -23
  49. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
  50. package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
  51. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  52. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  53. package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
  54. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  55. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  56. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  57. package/src/llama.cpp/ggml/include/ggml.h +10 -7
  58. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  60. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  61. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
  62. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  63. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
  64. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
  65. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
  66. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  67. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  68. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  69. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
  71. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  73. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
  74. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
  75. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  76. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  77. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
  78. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
  80. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
  81. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
  82. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  83. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  84. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  85. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  86. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
  87. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  88. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
  89. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  90. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  91. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  92. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
  93. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
  94. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
  95. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
  96. package/src/llama.cpp/ggml/src/ggml.c +29 -20
  97. package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
  98. package/src/llama.cpp/include/llama.h +52 -11
  99. package/src/llama.cpp/requirements/requirements-all.txt +3 -3
  100. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  101. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  102. package/src/llama.cpp/src/llama-adapter.cpp +6 -0
  103. package/src/llama.cpp/src/llama-arch.cpp +3 -0
  104. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  105. package/src/llama.cpp/src/llama-batch.h +2 -1
  106. package/src/llama.cpp/src/llama-chat.cpp +17 -7
  107. package/src/llama.cpp/src/llama-chat.h +1 -0
  108. package/src/llama.cpp/src/llama-context.cpp +389 -501
  109. package/src/llama.cpp/src/llama-context.h +44 -32
  110. package/src/llama.cpp/src/llama-cparams.h +1 -0
  111. package/src/llama.cpp/src/llama-graph.cpp +20 -38
  112. package/src/llama.cpp/src/llama-graph.h +12 -8
  113. package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
  114. package/src/llama.cpp/src/llama-kv-cache.h +271 -85
  115. package/src/llama.cpp/src/llama-memory.h +11 -1
  116. package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
  117. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  118. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  119. package/src/llama.cpp/src/llama-model.cpp +316 -69
  120. package/src/llama.cpp/src/llama-model.h +8 -1
  121. package/src/llama.cpp/src/llama-quant.cpp +15 -13
  122. package/src/llama.cpp/src/llama-sampling.cpp +18 -6
  123. package/src/llama.cpp/src/llama-vocab.cpp +42 -4
  124. package/src/llama.cpp/src/llama-vocab.h +6 -0
  125. package/src/llama.cpp/src/llama.cpp +14 -0
  126. package/src/llama.cpp/tests/CMakeLists.txt +10 -2
  127. package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
  128. package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
  129. package/src/llama.cpp/tests/test-chat.cpp +3 -1
  130. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  131. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  132. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  133. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  134. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  135. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
  136. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  137. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
  138. package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
  139. package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
  140. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
  141. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
  142. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  143. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
  144. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  145. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
  146. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  147. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
  148. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
  149. package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
  150. package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
  151. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  152. package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
  153. package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
  154. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  155. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  156. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  157. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  158. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  159. package/src/llama.cpp/examples/llava/clip.h +0 -135
  160. package/src/llama.cpp/examples/llava/llava.cpp +0 -586
  161. package/src/llama.cpp/examples/llava/llava.h +0 -49
  162. package/src/llama.cpp/examples/llava/mtmd.h +0 -168
  163. package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
  164. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  165. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  166. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  167. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  168. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  169. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  170. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  171. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  172. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  173. /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
  174. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  175. /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
  176. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  177. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  178. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  179. /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
  180. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  181. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  182. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  183. /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
  184. /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
  185. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  186. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  187. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  188. /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
  189. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  190. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  191. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  192. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
  193. /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
@@ -99,14 +99,6 @@ int main(int argc, char ** argv) {
99
99
  console::init(params.simple_io, params.use_color);
100
100
  atexit([]() { console::cleanup(); });
101
101
 
102
- if (params.logits_all) {
103
- LOG_ERR("************\n");
104
- LOG_ERR("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__);
105
- LOG_ERR("************\n\n");
106
-
107
- return 0;
108
- }
109
-
110
102
  if (params.embedding) {
111
103
  LOG_ERR("************\n");
112
104
  LOG_ERR("%s: please use the 'embedding' tool for embedding calculations\n", __func__);
@@ -160,7 +152,12 @@ int main(int argc, char ** argv) {
160
152
 
161
153
  LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
162
154
 
163
- auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
155
+ auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
156
+ if (!cpu_dev) {
157
+ LOG_ERR("%s: no CPU backend found\n", __func__);
158
+ return 1;
159
+ }
160
+ auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
164
161
  auto * ggml_threadpool_new_fn = (decltype(ggml_threadpool_new) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_new");
165
162
  auto * ggml_threadpool_free_fn = (decltype(ggml_threadpool_free) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_free");
166
163
 
@@ -1,33 +1,8 @@
1
- # llava (legacy)
2
-
3
- add_library(llava OBJECT
4
- llava.cpp
5
- llava.h
6
- clip.cpp
7
- clip.h
8
- )
9
-
10
- target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
11
-
12
- target_include_directories(llava PUBLIC .)
13
- target_include_directories(llava PUBLIC ../..)
14
- target_include_directories(llava PUBLIC ../../common)
15
-
16
- target_compile_features(llava PRIVATE cxx_std_17)
17
-
18
- add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
19
- if (BUILD_SHARED_LIBS)
20
- set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
21
- target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
22
- add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
23
- target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
24
- install(TARGETS llava_shared LIBRARY)
25
- endif()
26
-
27
1
  # mtmd
28
2
 
29
3
  add_library(mtmd OBJECT
30
4
  mtmd.cpp
5
+ mtmd-helper.cpp
31
6
  mtmd.h
32
7
  clip.cpp
33
8
  clip.h
@@ -52,12 +27,10 @@ if (BUILD_SHARED_LIBS)
52
27
  endif()
53
28
 
54
29
  if (NOT MSVC)
55
- target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
56
30
  target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h
57
31
  endif()
58
32
 
59
33
  if(TARGET BUILD_INFO)
60
- add_dependencies(llava BUILD_INFO)
61
34
  add_dependencies(mtmd BUILD_INFO)
62
35
  endif()
63
36
 
@@ -72,10 +45,3 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
72
45
  install(TARGETS ${TARGET} RUNTIME)
73
46
  target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
74
47
  target_compile_features(${TARGET} PRIVATE cxx_std_17)
75
-
76
- set(TARGET llama-llava-clip-quantize-cli)
77
- add_executable(${TARGET} clip-quantize-cli.cpp)
78
- set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-clip-quantize-cli)
79
- install(TARGETS ${TARGET} RUNTIME)
80
- target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
81
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -31,9 +31,7 @@
31
31
  #define KEY_FEATURE_LAYER "clip.vision.feature_layer"
32
32
  #define KEY_PROJ_SCALE_FACTOR "clip.vision.projector.scale_factor"
33
33
  #define KEY_PROJ_TYPE "clip.projector_type"
34
-
35
- #define KEY_USE_GLU_MLP "clip.use_glu_mlp" // for qwen2.5vl
36
- #define KEY_USE_RMS_NORM "clip.use_rms_norm" // for qwen2.5vl
34
+ #define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
37
35
 
38
36
  #define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
39
37
  #define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
@@ -55,12 +53,16 @@
55
53
  #define TN_ATTN_Q "%s.blk.%d.attn_q.%s"
56
54
  #define TN_ATTN_V "%s.blk.%d.attn_v.%s"
57
55
  #define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s"
56
+ #define TN_ATTN_K_NORM "%s.blk.%d.attn_k_norm.%s"
57
+ #define TN_ATTN_Q_NORM "%s.blk.%d.attn_q_norm.%s"
58
58
  #define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s"
59
59
  #define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
60
60
  #define TN_FFN_UP "%s.blk.%d.ffn_up.%s"
61
61
  #define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
62
- #define TN_LN_1 "%s.blk.%d.ln1.%s"
63
- #define TN_LN_2 "%s.blk.%d.ln2.%s"
62
+ #define TN_LN_1 "%s.blk.%d.ln1.%s" // layer norm
63
+ #define TN_LN_2 "%s.blk.%d.ln2.%s" // layer norm
64
+ #define TN_LS_1 "%s.blk.%d.ls1.%s" // layer scale
65
+ #define TN_LS_2 "%s.blk.%d.ls2.%s" // layer scale
64
66
  #define TN_LN_PRE "%s.pre_ln.%s"
65
67
  #define TN_LN_POST "%s.post_ln.%s"
66
68
  #define TN_LLAVA_PROJ "mm.%d.%s"
@@ -68,10 +70,14 @@
68
70
  #define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
69
71
  #define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
70
72
  #define TN_IMAGE_NEWLINE "model.image_newline"
73
+ #define TN_MM_INP_NORM "mm.input_norm.weight"
71
74
  #define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3
72
75
  #define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3
73
76
  #define TN_MM_PROJECTOR "mm.model.fc.weight" // idefics3
77
+ #define TN_MM_PATCH_MERGER "mm.patch_merger.weight" // mistral small 3.1
74
78
  #define TN_TOK_IMG_BREAK "v.token_embd.img_break" // pixtral
79
+ #define TN_TOK_GLM_BOI "adapter.boi" // glm-edge (these embeddings are not in text model)
80
+ #define TN_TOK_GLM_EOI "adapter.eoi" // glm-edge (these embeddings are not in text model)
75
81
 
76
82
  // mimicpmv
77
83
  #define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k"
@@ -88,6 +94,9 @@
88
94
  #define TN_GLM_ADAPTER_GATE "adapter.linear.gate.%s"
89
95
  #define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s"
90
96
 
97
+ // align x to upper multiple of n
98
+ #define CLIP_ALIGN(x, n) ((((x) + (n) - 1) / (n)) * (n))
99
+
91
100
  enum projector_type {
92
101
  PROJECTOR_TYPE_MLP,
93
102
  PROJECTOR_TYPE_MLP_NORM,
@@ -100,6 +109,7 @@ enum projector_type {
100
109
  PROJECTOR_TYPE_IDEFICS3,
101
110
  PROJECTOR_TYPE_PIXTRAL,
102
111
  PROJECTOR_TYPE_QWEN25VL,
112
+ PROJECTOR_TYPE_INTERNVL,
103
113
  PROJECTOR_TYPE_UNKNOWN,
104
114
  };
105
115
 
@@ -114,6 +124,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
114
124
  { PROJECTOR_TYPE_GEMMA3, "gemma3"},
115
125
  { PROJECTOR_TYPE_IDEFICS3, "idefics3"},
116
126
  { PROJECTOR_TYPE_PIXTRAL, "pixtral"},
127
+ { PROJECTOR_TYPE_INTERNVL, "internvl"},
117
128
  };
118
129
 
119
130
  static projector_type clip_projector_type_from_string(const std::string & str) {
@@ -228,6 +239,15 @@ struct clip_image_u8_batch {
228
239
 
229
240
  struct clip_image_f32_batch {
230
241
  std::vector<clip_image_f32_ptr> entries;
242
+
243
+ clip_image_f32_batch clone() const {
244
+ clip_image_f32_batch new_batch;
245
+ new_batch.entries.reserve(entries.size());
246
+ for (const auto & entry : entries) {
247
+ new_batch.entries.emplace_back(new clip_image_f32(*entry));
248
+ }
249
+ return new_batch;
250
+ }
231
251
  };
232
252
 
233
253
  //