@fugood/llama.node 0.3.17 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +39 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +366 -19
- package/src/LlamaCompletionWorker.h +30 -10
- package/src/LlamaContext.cpp +213 -5
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
- package/src/llama.cpp/.github/workflows/build.yml +41 -762
- package/src/llama.cpp/.github/workflows/docker.yml +5 -2
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +12 -12
- package/src/llama.cpp/CMakeLists.txt +5 -17
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +31 -3
- package/src/llama.cpp/common/arg.cpp +48 -29
- package/src/llama.cpp/common/chat.cpp +128 -106
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +37 -1
- package/src/llama.cpp/common/common.h +18 -9
- package/src/llama.cpp/common/llguidance.cpp +1 -0
- package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/src/llama.cpp/common/minja/minja.hpp +69 -36
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +57 -50
- package/src/llama.cpp/examples/CMakeLists.txt +2 -23
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml.h +10 -7
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
- package/src/llama.cpp/ggml/src/ggml.c +29 -20
- package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/src/llama.cpp/include/llama.h +52 -11
- package/src/llama.cpp/requirements/requirements-all.txt +3 -3
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +3 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +17 -7
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +389 -501
- package/src/llama.cpp/src/llama-context.h +44 -32
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +20 -38
- package/src/llama.cpp/src/llama-graph.h +12 -8
- package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
- package/src/llama.cpp/src/llama-kv-cache.h +271 -85
- package/src/llama.cpp/src/llama-memory.h +11 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +316 -69
- package/src/llama.cpp/src/llama-model.h +8 -1
- package/src/llama.cpp/src/llama-quant.cpp +15 -13
- package/src/llama.cpp/src/llama-sampling.cpp +18 -6
- package/src/llama.cpp/src/llama-vocab.cpp +42 -4
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +14 -0
- package/src/llama.cpp/tests/CMakeLists.txt +10 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
- package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
- package/src/llama.cpp/tests/test-chat.cpp +3 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
- package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
- package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
- package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
- package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.h +0 -135
- package/src/llama.cpp/examples/llava/llava.cpp +0 -586
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/mtmd.h +0 -168
- package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
|
@@ -99,14 +99,6 @@ int main(int argc, char ** argv) {
|
|
|
99
99
|
console::init(params.simple_io, params.use_color);
|
|
100
100
|
atexit([]() { console::cleanup(); });
|
|
101
101
|
|
|
102
|
-
if (params.logits_all) {
|
|
103
|
-
LOG_ERR("************\n");
|
|
104
|
-
LOG_ERR("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__);
|
|
105
|
-
LOG_ERR("************\n\n");
|
|
106
|
-
|
|
107
|
-
return 0;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
102
|
if (params.embedding) {
|
|
111
103
|
LOG_ERR("************\n");
|
|
112
104
|
LOG_ERR("%s: please use the 'embedding' tool for embedding calculations\n", __func__);
|
|
@@ -160,7 +152,12 @@ int main(int argc, char ** argv) {
|
|
|
160
152
|
|
|
161
153
|
LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
|
|
162
154
|
|
|
163
|
-
auto *
|
|
155
|
+
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
156
|
+
if (!cpu_dev) {
|
|
157
|
+
LOG_ERR("%s: no CPU backend found\n", __func__);
|
|
158
|
+
return 1;
|
|
159
|
+
}
|
|
160
|
+
auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
|
|
164
161
|
auto * ggml_threadpool_new_fn = (decltype(ggml_threadpool_new) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_new");
|
|
165
162
|
auto * ggml_threadpool_free_fn = (decltype(ggml_threadpool_free) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_free");
|
|
166
163
|
|
|
@@ -1,33 +1,8 @@
|
|
|
1
|
-
# llava (legacy)
|
|
2
|
-
|
|
3
|
-
add_library(llava OBJECT
|
|
4
|
-
llava.cpp
|
|
5
|
-
llava.h
|
|
6
|
-
clip.cpp
|
|
7
|
-
clip.h
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
11
|
-
|
|
12
|
-
target_include_directories(llava PUBLIC .)
|
|
13
|
-
target_include_directories(llava PUBLIC ../..)
|
|
14
|
-
target_include_directories(llava PUBLIC ../../common)
|
|
15
|
-
|
|
16
|
-
target_compile_features(llava PRIVATE cxx_std_17)
|
|
17
|
-
|
|
18
|
-
add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
|
|
19
|
-
if (BUILD_SHARED_LIBS)
|
|
20
|
-
set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
21
|
-
target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
|
|
22
|
-
add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
|
|
23
|
-
target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
24
|
-
install(TARGETS llava_shared LIBRARY)
|
|
25
|
-
endif()
|
|
26
|
-
|
|
27
1
|
# mtmd
|
|
28
2
|
|
|
29
3
|
add_library(mtmd OBJECT
|
|
30
4
|
mtmd.cpp
|
|
5
|
+
mtmd-helper.cpp
|
|
31
6
|
mtmd.h
|
|
32
7
|
clip.cpp
|
|
33
8
|
clip.h
|
|
@@ -52,12 +27,10 @@ if (BUILD_SHARED_LIBS)
|
|
|
52
27
|
endif()
|
|
53
28
|
|
|
54
29
|
if (NOT MSVC)
|
|
55
|
-
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
|
|
56
30
|
target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h
|
|
57
31
|
endif()
|
|
58
32
|
|
|
59
33
|
if(TARGET BUILD_INFO)
|
|
60
|
-
add_dependencies(llava BUILD_INFO)
|
|
61
34
|
add_dependencies(mtmd BUILD_INFO)
|
|
62
35
|
endif()
|
|
63
36
|
|
|
@@ -72,10 +45,3 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
|
|
|
72
45
|
install(TARGETS ${TARGET} RUNTIME)
|
|
73
46
|
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
74
47
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
75
|
-
|
|
76
|
-
set(TARGET llama-llava-clip-quantize-cli)
|
|
77
|
-
add_executable(${TARGET} clip-quantize-cli.cpp)
|
|
78
|
-
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-clip-quantize-cli)
|
|
79
|
-
install(TARGETS ${TARGET} RUNTIME)
|
|
80
|
-
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
|
81
|
-
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
@@ -31,9 +31,7 @@
|
|
|
31
31
|
#define KEY_FEATURE_LAYER "clip.vision.feature_layer"
|
|
32
32
|
#define KEY_PROJ_SCALE_FACTOR "clip.vision.projector.scale_factor"
|
|
33
33
|
#define KEY_PROJ_TYPE "clip.projector_type"
|
|
34
|
-
|
|
35
|
-
#define KEY_USE_GLU_MLP "clip.use_glu_mlp" // for qwen2.5vl
|
|
36
|
-
#define KEY_USE_RMS_NORM "clip.use_rms_norm" // for qwen2.5vl
|
|
34
|
+
#define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
|
|
37
35
|
|
|
38
36
|
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
|
|
39
37
|
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
|
|
@@ -55,12 +53,16 @@
|
|
|
55
53
|
#define TN_ATTN_Q "%s.blk.%d.attn_q.%s"
|
|
56
54
|
#define TN_ATTN_V "%s.blk.%d.attn_v.%s"
|
|
57
55
|
#define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s"
|
|
56
|
+
#define TN_ATTN_K_NORM "%s.blk.%d.attn_k_norm.%s"
|
|
57
|
+
#define TN_ATTN_Q_NORM "%s.blk.%d.attn_q_norm.%s"
|
|
58
58
|
#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s"
|
|
59
59
|
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
|
|
60
60
|
#define TN_FFN_UP "%s.blk.%d.ffn_up.%s"
|
|
61
61
|
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
|
|
62
|
-
#define TN_LN_1 "%s.blk.%d.ln1.%s"
|
|
63
|
-
#define TN_LN_2 "%s.blk.%d.ln2.%s"
|
|
62
|
+
#define TN_LN_1 "%s.blk.%d.ln1.%s" // layer norm
|
|
63
|
+
#define TN_LN_2 "%s.blk.%d.ln2.%s" // layer norm
|
|
64
|
+
#define TN_LS_1 "%s.blk.%d.ls1.%s" // layer scale
|
|
65
|
+
#define TN_LS_2 "%s.blk.%d.ls2.%s" // layer scale
|
|
64
66
|
#define TN_LN_PRE "%s.pre_ln.%s"
|
|
65
67
|
#define TN_LN_POST "%s.post_ln.%s"
|
|
66
68
|
#define TN_LLAVA_PROJ "mm.%d.%s"
|
|
@@ -68,10 +70,14 @@
|
|
|
68
70
|
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
|
|
69
71
|
#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
|
|
70
72
|
#define TN_IMAGE_NEWLINE "model.image_newline"
|
|
73
|
+
#define TN_MM_INP_NORM "mm.input_norm.weight"
|
|
71
74
|
#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3
|
|
72
75
|
#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3
|
|
73
76
|
#define TN_MM_PROJECTOR "mm.model.fc.weight" // idefics3
|
|
77
|
+
#define TN_MM_PATCH_MERGER "mm.patch_merger.weight" // mistral small 3.1
|
|
74
78
|
#define TN_TOK_IMG_BREAK "v.token_embd.img_break" // pixtral
|
|
79
|
+
#define TN_TOK_GLM_BOI "adapter.boi" // glm-edge (these embeddings are not in text model)
|
|
80
|
+
#define TN_TOK_GLM_EOI "adapter.eoi" // glm-edge (these embeddings are not in text model)
|
|
75
81
|
|
|
76
82
|
// mimicpmv
|
|
77
83
|
#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k"
|
|
@@ -88,6 +94,9 @@
|
|
|
88
94
|
#define TN_GLM_ADAPTER_GATE "adapter.linear.gate.%s"
|
|
89
95
|
#define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s"
|
|
90
96
|
|
|
97
|
+
// align x to upper multiple of n
|
|
98
|
+
#define CLIP_ALIGN(x, n) ((((x) + (n) - 1) / (n)) * (n))
|
|
99
|
+
|
|
91
100
|
enum projector_type {
|
|
92
101
|
PROJECTOR_TYPE_MLP,
|
|
93
102
|
PROJECTOR_TYPE_MLP_NORM,
|
|
@@ -100,6 +109,7 @@ enum projector_type {
|
|
|
100
109
|
PROJECTOR_TYPE_IDEFICS3,
|
|
101
110
|
PROJECTOR_TYPE_PIXTRAL,
|
|
102
111
|
PROJECTOR_TYPE_QWEN25VL,
|
|
112
|
+
PROJECTOR_TYPE_INTERNVL,
|
|
103
113
|
PROJECTOR_TYPE_UNKNOWN,
|
|
104
114
|
};
|
|
105
115
|
|
|
@@ -114,6 +124,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
|
|
114
124
|
{ PROJECTOR_TYPE_GEMMA3, "gemma3"},
|
|
115
125
|
{ PROJECTOR_TYPE_IDEFICS3, "idefics3"},
|
|
116
126
|
{ PROJECTOR_TYPE_PIXTRAL, "pixtral"},
|
|
127
|
+
{ PROJECTOR_TYPE_INTERNVL, "internvl"},
|
|
117
128
|
};
|
|
118
129
|
|
|
119
130
|
static projector_type clip_projector_type_from_string(const std::string & str) {
|
|
@@ -228,6 +239,15 @@ struct clip_image_u8_batch {
|
|
|
228
239
|
|
|
229
240
|
struct clip_image_f32_batch {
|
|
230
241
|
std::vector<clip_image_f32_ptr> entries;
|
|
242
|
+
|
|
243
|
+
clip_image_f32_batch clone() const {
|
|
244
|
+
clip_image_f32_batch new_batch;
|
|
245
|
+
new_batch.entries.reserve(entries.size());
|
|
246
|
+
for (const auto & entry : entries) {
|
|
247
|
+
new_batch.entries.emplace_back(new clip_image_f32(*entry));
|
|
248
|
+
}
|
|
249
|
+
return new_batch;
|
|
250
|
+
}
|
|
231
251
|
};
|
|
232
252
|
|
|
233
253
|
//
|