cactus-react-native 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.txt +20 -0
- package/README.md +3 -1
- package/android/src/main/CMakeLists.txt +60 -21
- package/android/src/main/java/com/cactus/Cactus.java +465 -0
- package/android/src/main/java/com/cactus/LlamaContext.java +199 -0
- package/android/src/main/jni.cpp +325 -10
- package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
- package/android/src/newarch/java/com/cactus/CactusModule.java +79 -7
- package/android/src/oldarch/java/com/cactus/CactusModule.java +70 -0
- package/cactus-react-native.podspec +0 -3
- package/ios/CMakeLists.txt +56 -36
- package/ios/Cactus.mm +243 -2
- package/ios/CactusContext.h +22 -0
- package/ios/CactusContext.mm +176 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +92 -5
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +229 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/chat.h +2 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/common.h +42 -51
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-backend.h +4 -4
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-common.h +12 -6
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpp.h +1 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu.h +5 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-impl.h +52 -18
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-metal-impl.h +106 -14
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-opt.h +49 -28
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml.h +87 -106
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-arch.h +16 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-batch.h +2 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-chat.h +7 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-context.h +44 -33
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-cparams.h +1 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-graph.h +83 -17
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-hparams.h +44 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-kv-cache.h +407 -179
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-memory.h +13 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-loader.h +5 -3
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-saver.h +37 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model.h +24 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-vocab.h +6 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama.h +102 -142
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/chat-template.hpp +23 -11
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/minja.hpp +186 -127
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/ggml-llama.metallib +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +92 -5
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +229 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +2 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/common.h +42 -51
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +4 -4
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +12 -6
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +1 -1
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +5 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +52 -18
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +106 -14
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +49 -28
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +87 -106
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +16 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +2 -1
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +7 -2
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +44 -33
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +1 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +83 -17
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +44 -2
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +407 -179
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +13 -2
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +5 -3
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-saver.h +37 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +24 -2
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +6 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +102 -142
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +23 -11
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +186 -127
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +92 -5
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +229 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/chat.h +2 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/common.h +42 -51
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-backend.h +4 -4
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-common.h +12 -6
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpp.h +1 -1
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu.h +5 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-impl.h +52 -18
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-metal-impl.h +106 -14
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-opt.h +49 -28
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml.h +87 -106
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-arch.h +16 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-batch.h +2 -1
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-chat.h +7 -2
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-context.h +44 -33
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-cparams.h +1 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-graph.h +83 -17
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-hparams.h +44 -2
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-kv-cache.h +407 -179
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-memory.h +13 -2
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-loader.h +5 -3
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-saver.h +37 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model.h +24 -2
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-vocab.h +6 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama.h +102 -142
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/chat-template.hpp +23 -11
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/minja.hpp +186 -127
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/ggml-llama.metallib +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +92 -5
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +229 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +2 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/common.h +42 -51
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +4 -4
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +12 -6
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +1 -1
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +5 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +52 -18
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +106 -14
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +49 -28
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +87 -106
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +16 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +2 -1
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +7 -2
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +44 -33
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +1 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +83 -17
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +44 -2
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +407 -179
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +13 -2
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +5 -3
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-saver.h +37 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +24 -2
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +6 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +102 -142
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +23 -11
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +186 -127
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
- package/lib/commonjs/NativeCactus.js +1 -0
- package/lib/commonjs/NativeCactus.js.map +1 -1
- package/lib/commonjs/index.js +112 -0
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/tools.js +118 -0
- package/lib/commonjs/tools.js.map +1 -0
- package/lib/module/NativeCactus.js +3 -0
- package/lib/module/NativeCactus.js.map +1 -1
- package/lib/module/index.js +87 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/tools.js +110 -0
- package/lib/module/tools.js.map +1 -0
- package/lib/typescript/NativeCactus.d.ts +30 -1
- package/lib/typescript/NativeCactus.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +21 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/tools.d.ts +38 -0
- package/lib/typescript/tools.d.ts.map +1 -0
- package/package.json +6 -3
- package/src/NativeCactus.ts +62 -1
- package/src/index.ts +113 -2
- package/src/tools.ts +127 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/sgemm.h +0 -14
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/sgemm.h +0 -14
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/sgemm.h +0 -14
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/sgemm.h +0 -14
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
#include <android/log.h>
|
|
15
15
|
#endif
|
|
16
16
|
|
|
17
|
+
struct mtmd_context;
|
|
18
|
+
|
|
17
19
|
namespace cactus {
|
|
18
20
|
|
|
19
21
|
std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
|
|
@@ -28,7 +30,12 @@ enum stop_type
|
|
|
28
30
|
STOP_PARTIAL,
|
|
29
31
|
};
|
|
30
32
|
|
|
31
|
-
|
|
33
|
+
enum tts_type {
|
|
34
|
+
TTS_UNKNOWN = -1,
|
|
35
|
+
TTS_OUTETTS_V0_2 = 1,
|
|
36
|
+
TTS_OUTETTS_V0_3 = 2,
|
|
37
|
+
};
|
|
38
|
+
|
|
32
39
|
struct completion_token_output
|
|
33
40
|
{
|
|
34
41
|
struct token_prob
|
|
@@ -41,7 +48,14 @@ struct completion_token_output
|
|
|
41
48
|
llama_token tok;
|
|
42
49
|
};
|
|
43
50
|
|
|
44
|
-
|
|
51
|
+
struct cactus_tokenize_result {
|
|
52
|
+
std::vector<llama_token> tokens;
|
|
53
|
+
bool has_media = false;
|
|
54
|
+
std::vector<std::string> bitmap_hashes;
|
|
55
|
+
std::vector<size_t> chunk_pos;
|
|
56
|
+
std::vector<size_t> chunk_pos_media;
|
|
57
|
+
};
|
|
58
|
+
|
|
45
59
|
struct cactus_context {
|
|
46
60
|
bool is_predicting = false;
|
|
47
61
|
bool is_interrupted = false;
|
|
@@ -77,12 +91,37 @@ struct cactus_context {
|
|
|
77
91
|
|
|
78
92
|
std::vector<common_adapter_lora_info> lora;
|
|
79
93
|
|
|
94
|
+
bool context_full = false;
|
|
95
|
+
std::vector<llama_token> guide_tokens;
|
|
96
|
+
bool next_token_uses_guide_token = true;
|
|
97
|
+
|
|
98
|
+
struct cactus_context_mtmd {
|
|
99
|
+
mtmd_context* mtmd_ctx = nullptr;
|
|
100
|
+
};
|
|
101
|
+
cactus_context_mtmd *mtmd_wrapper = nullptr;
|
|
102
|
+
bool has_multimodal = false;
|
|
103
|
+
std::vector<std::string> mtmd_bitmap_past_hashes;
|
|
104
|
+
|
|
105
|
+
struct cactus_context_vocoder {
|
|
106
|
+
common_init_result init_result;
|
|
107
|
+
llama_model *model = nullptr;
|
|
108
|
+
llama_context *ctx = nullptr;
|
|
109
|
+
tts_type type = TTS_UNKNOWN;
|
|
110
|
+
};
|
|
111
|
+
cactus_context_vocoder *vocoder_wrapper = nullptr;
|
|
112
|
+
bool has_vocoder = false;
|
|
113
|
+
std::vector<llama_token> audio_tokens;
|
|
114
|
+
|
|
80
115
|
~cactus_context();
|
|
81
116
|
|
|
82
117
|
void rewind();
|
|
118
|
+
|
|
83
119
|
bool initSampling();
|
|
120
|
+
|
|
84
121
|
bool loadModel(common_params ¶ms_);
|
|
122
|
+
|
|
85
123
|
bool validateModelChatTemplate(bool use_jinja, const char *name) const;
|
|
124
|
+
|
|
86
125
|
common_chat_params getFormattedChatWithJinja(
|
|
87
126
|
const std::string &messages,
|
|
88
127
|
const std::string &chat_template,
|
|
@@ -91,24 +130,58 @@ struct cactus_context {
|
|
|
91
130
|
const bool ¶llel_tool_calls,
|
|
92
131
|
const std::string &tool_choice
|
|
93
132
|
) const;
|
|
133
|
+
|
|
94
134
|
std::string getFormattedChat(
|
|
95
135
|
const std::string &messages,
|
|
96
136
|
const std::string &chat_template
|
|
97
137
|
) const;
|
|
138
|
+
|
|
98
139
|
void truncatePrompt(std::vector<llama_token> &prompt_tokens);
|
|
140
|
+
|
|
99
141
|
void loadPrompt();
|
|
142
|
+
|
|
143
|
+
void loadPrompt(const std::vector<std::string> &media_paths);
|
|
144
|
+
|
|
145
|
+
void setGuideTokens(const std::vector<llama_token> &tokens);
|
|
146
|
+
|
|
100
147
|
void beginCompletion();
|
|
148
|
+
|
|
149
|
+
void endCompletion();
|
|
150
|
+
|
|
101
151
|
completion_token_output nextToken();
|
|
152
|
+
|
|
102
153
|
size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
|
|
154
|
+
|
|
103
155
|
completion_token_output doCompletion();
|
|
156
|
+
|
|
104
157
|
std::vector<float> getEmbedding(common_params &embd_params);
|
|
158
|
+
|
|
105
159
|
std::string bench(int pp, int tg, int pl, int nr);
|
|
160
|
+
|
|
106
161
|
int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
|
|
162
|
+
|
|
107
163
|
void removeLoraAdapters();
|
|
164
|
+
|
|
108
165
|
std::vector<common_adapter_lora_info> getLoadedLoraAdapters();
|
|
109
|
-
};\
|
|
110
166
|
|
|
111
|
-
|
|
167
|
+
cactus_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
|
|
168
|
+
|
|
169
|
+
bool initMultimodal(const std::string &mmproj_path, bool use_gpu);
|
|
170
|
+
bool isMultimodalEnabled() const;
|
|
171
|
+
bool isMultimodalSupportVision() const;
|
|
172
|
+
bool isMultimodalSupportAudio() const;
|
|
173
|
+
void releaseMultimodal();
|
|
174
|
+
void processMedia(const std::string &prompt, const std::vector<std::string> &media_paths);
|
|
175
|
+
|
|
176
|
+
bool initVocoder(const std::string &vocoder_model_path);
|
|
177
|
+
bool isVocoderEnabled() const;
|
|
178
|
+
tts_type getTTSType() const;
|
|
179
|
+
std::string getFormattedAudioCompletion(const std::string &speaker_json_str, const std::string &text_to_speak);
|
|
180
|
+
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
|
181
|
+
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
|
182
|
+
void releaseVocoder();
|
|
183
|
+
};
|
|
184
|
+
|
|
112
185
|
extern bool cactus_verbose;
|
|
113
186
|
|
|
114
187
|
#if CACTUS_VERBOSE != 1
|
|
@@ -125,9 +198,23 @@ extern bool cactus_verbose;
|
|
|
125
198
|
#endif
|
|
126
199
|
|
|
127
200
|
#define LOG_ERROR(MSG, ...) log("ERROR", __func__, __LINE__, MSG, ##__VA_ARGS__)
|
|
201
|
+
|
|
128
202
|
#define LOG_WARNING(MSG, ...) log("WARNING", __func__, __LINE__, MSG, ##__VA_ARGS__)
|
|
203
|
+
|
|
129
204
|
#define LOG_INFO(MSG, ...) log("INFO", __func__, __LINE__, MSG, ##__VA_ARGS__)
|
|
130
205
|
|
|
206
|
+
void log(const char *level, const char *function, int line, const char *format, ...);
|
|
207
|
+
|
|
208
|
+
void llama_batch_clear(llama_batch *batch);
|
|
209
|
+
|
|
210
|
+
void llama_batch_add(llama_batch *batch, llama_token id, llama_pos pos, const std::vector<llama_seq_id>& seq_ids, bool logits);
|
|
211
|
+
|
|
212
|
+
size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b);
|
|
213
|
+
|
|
214
|
+
bool ends_with(const std::string &str, const std::string &suffix);
|
|
215
|
+
|
|
216
|
+
size_t find_partial_stop_string(const std::string &stop, const std::string &text);
|
|
217
|
+
|
|
131
218
|
} // namespace cactus
|
|
132
219
|
|
|
133
|
-
#endif /* CACTUS_H */
|
|
220
|
+
#endif /* CACTUS_H */
|
package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#ifndef CACTUS_FFI_H
|
|
2
|
+
#define CACTUS_FFI_H
|
|
3
|
+
|
|
4
|
+
#include <stdint.h>
|
|
5
|
+
#include <stdbool.h>
|
|
6
|
+
#include <stddef.h>
|
|
7
|
+
|
|
8
|
+
#if defined _WIN32 || defined __CYGWIN__
|
|
9
|
+
#ifdef CACTUS_FFI_BUILDING_DLL
|
|
10
|
+
#ifdef __GNUC__
|
|
11
|
+
#define CACTUS_FFI_EXPORT __attribute__ ((dllexport))
|
|
12
|
+
#else
|
|
13
|
+
#define CACTUS_FFI_EXPORT __declspec(dllexport)
|
|
14
|
+
#endif
|
|
15
|
+
#else
|
|
16
|
+
#ifdef __GNUC__
|
|
17
|
+
#define CACTUS_FFI_EXPORT __attribute__ ((dllimport))
|
|
18
|
+
#else
|
|
19
|
+
#define CACTUS_FFI_EXPORT __declspec(dllimport)
|
|
20
|
+
#endif
|
|
21
|
+
#endif
|
|
22
|
+
#define CACTUS_FFI_LOCAL
|
|
23
|
+
#else
|
|
24
|
+
#if __GNUC__ >= 4
|
|
25
|
+
#define CACTUS_FFI_EXPORT __attribute__ ((visibility ("default")))
|
|
26
|
+
#define CACTUS_FFI_LOCAL __attribute__ ((visibility ("hidden")))
|
|
27
|
+
#else
|
|
28
|
+
#define CACTUS_FFI_EXPORT
|
|
29
|
+
#define CACTUS_FFI_LOCAL
|
|
30
|
+
#endif
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
#ifdef __cplusplus
|
|
34
|
+
extern "C" {
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
typedef struct cactus_context_opaque* cactus_context_handle_t;
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
typedef struct cactus_init_params_c {
|
|
41
|
+
const char* model_path;
|
|
42
|
+
const char* chat_template;
|
|
43
|
+
|
|
44
|
+
int32_t n_ctx;
|
|
45
|
+
int32_t n_batch;
|
|
46
|
+
int32_t n_ubatch;
|
|
47
|
+
int32_t n_gpu_layers;
|
|
48
|
+
int32_t n_threads;
|
|
49
|
+
bool use_mmap;
|
|
50
|
+
bool use_mlock;
|
|
51
|
+
bool embedding;
|
|
52
|
+
int32_t pooling_type;
|
|
53
|
+
int32_t embd_normalize;
|
|
54
|
+
bool flash_attn;
|
|
55
|
+
const char* cache_type_k;
|
|
56
|
+
const char* cache_type_v;
|
|
57
|
+
void (*progress_callback)(float progress);
|
|
58
|
+
|
|
59
|
+
} cactus_init_params_c_t;
|
|
60
|
+
|
|
61
|
+
typedef struct cactus_completion_params_c {
|
|
62
|
+
const char* prompt;
|
|
63
|
+
int32_t n_predict;
|
|
64
|
+
int32_t n_threads;
|
|
65
|
+
int32_t seed;
|
|
66
|
+
double temperature;
|
|
67
|
+
int32_t top_k;
|
|
68
|
+
double top_p;
|
|
69
|
+
double min_p;
|
|
70
|
+
double typical_p;
|
|
71
|
+
int32_t penalty_last_n;
|
|
72
|
+
double penalty_repeat;
|
|
73
|
+
double penalty_freq;
|
|
74
|
+
double penalty_present;
|
|
75
|
+
int32_t mirostat;
|
|
76
|
+
double mirostat_tau;
|
|
77
|
+
double mirostat_eta;
|
|
78
|
+
bool ignore_eos;
|
|
79
|
+
int32_t n_probs;
|
|
80
|
+
const char** stop_sequences;
|
|
81
|
+
int stop_sequence_count;
|
|
82
|
+
const char* grammar;
|
|
83
|
+
bool (*token_callback)(const char* token_json);
|
|
84
|
+
|
|
85
|
+
} cactus_completion_params_c_t;
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
typedef struct cactus_token_array_c {
|
|
89
|
+
int32_t* tokens;
|
|
90
|
+
int32_t count;
|
|
91
|
+
} cactus_token_array_c_t;
|
|
92
|
+
|
|
93
|
+
typedef struct cactus_float_array_c {
|
|
94
|
+
float* values;
|
|
95
|
+
int32_t count;
|
|
96
|
+
} cactus_float_array_c_t;
|
|
97
|
+
|
|
98
|
+
typedef struct cactus_completion_result_c {
|
|
99
|
+
char* text;
|
|
100
|
+
int32_t tokens_predicted;
|
|
101
|
+
int32_t tokens_evaluated;
|
|
102
|
+
bool truncated;
|
|
103
|
+
bool stopped_eos;
|
|
104
|
+
bool stopped_word;
|
|
105
|
+
bool stopped_limit;
|
|
106
|
+
char* stopping_word;
|
|
107
|
+
} cactus_completion_result_c_t;
|
|
108
|
+
|
|
109
|
+
typedef struct cactus_tokenize_result_c {
|
|
110
|
+
cactus_token_array_c_t tokens;
|
|
111
|
+
bool has_media;
|
|
112
|
+
char** bitmap_hashes;
|
|
113
|
+
int bitmap_hash_count;
|
|
114
|
+
size_t* chunk_positions;
|
|
115
|
+
int chunk_position_count;
|
|
116
|
+
size_t* chunk_positions_media;
|
|
117
|
+
int chunk_position_media_count;
|
|
118
|
+
} cactus_tokenize_result_c_t;
|
|
119
|
+
|
|
120
|
+
CACTUS_FFI_EXPORT cactus_context_handle_t cactus_init_context_c(const cactus_init_params_c_t* params);
|
|
121
|
+
|
|
122
|
+
CACTUS_FFI_EXPORT void cactus_free_context_c(cactus_context_handle_t handle);
|
|
123
|
+
|
|
124
|
+
CACTUS_FFI_EXPORT int cactus_completion_c(
|
|
125
|
+
cactus_context_handle_t handle,
|
|
126
|
+
const cactus_completion_params_c_t* params,
|
|
127
|
+
cactus_completion_result_c_t* result
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
|
|
131
|
+
|
|
132
|
+
CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
|
|
133
|
+
|
|
134
|
+
CACTUS_FFI_EXPORT char* cactus_detokenize_c(cactus_context_handle_t handle, const int32_t* tokens, int32_t count);
|
|
135
|
+
|
|
136
|
+
CACTUS_FFI_EXPORT cactus_float_array_c_t cactus_embedding_c(cactus_context_handle_t handle, const char* text);
|
|
137
|
+
|
|
138
|
+
CACTUS_FFI_EXPORT void cactus_free_string_c(char* str);
|
|
139
|
+
|
|
140
|
+
CACTUS_FFI_EXPORT void cactus_free_token_array_c(cactus_token_array_c_t arr);
|
|
141
|
+
|
|
142
|
+
CACTUS_FFI_EXPORT void cactus_free_float_array_c(cactus_float_array_c_t arr);
|
|
143
|
+
|
|
144
|
+
CACTUS_FFI_EXPORT void cactus_free_completion_result_members_c(cactus_completion_result_c_t* result);
|
|
145
|
+
|
|
146
|
+
CACTUS_FFI_EXPORT cactus_tokenize_result_c_t cactus_tokenize_with_media_c(cactus_context_handle_t handle, const char* text, const char** media_paths, int media_count);
|
|
147
|
+
|
|
148
|
+
CACTUS_FFI_EXPORT void cactus_free_tokenize_result_c(cactus_tokenize_result_c_t* result);
|
|
149
|
+
|
|
150
|
+
CACTUS_FFI_EXPORT void cactus_set_guide_tokens_c(cactus_context_handle_t handle, const int32_t* tokens, int32_t count);
|
|
151
|
+
|
|
152
|
+
CACTUS_FFI_EXPORT int cactus_init_multimodal_c(cactus_context_handle_t handle, const char* mmproj_path, bool use_gpu);
|
|
153
|
+
|
|
154
|
+
CACTUS_FFI_EXPORT bool cactus_is_multimodal_enabled_c(cactus_context_handle_t handle);
|
|
155
|
+
|
|
156
|
+
CACTUS_FFI_EXPORT bool cactus_supports_vision_c(cactus_context_handle_t handle);
|
|
157
|
+
|
|
158
|
+
CACTUS_FFI_EXPORT bool cactus_supports_audio_c(cactus_context_handle_t handle);
|
|
159
|
+
|
|
160
|
+
CACTUS_FFI_EXPORT void cactus_release_multimodal_c(cactus_context_handle_t handle);
|
|
161
|
+
|
|
162
|
+
CACTUS_FFI_EXPORT int cactus_init_vocoder_c(cactus_context_handle_t handle, const char* vocoder_model_path);
|
|
163
|
+
|
|
164
|
+
CACTUS_FFI_EXPORT bool cactus_is_vocoder_enabled_c(cactus_context_handle_t handle);
|
|
165
|
+
|
|
166
|
+
CACTUS_FFI_EXPORT int cactus_get_tts_type_c(cactus_context_handle_t handle);
|
|
167
|
+
|
|
168
|
+
CACTUS_FFI_EXPORT char* cactus_get_formatted_audio_completion_c(cactus_context_handle_t handle, const char* speaker_json_str, const char* text_to_speak);
|
|
169
|
+
|
|
170
|
+
CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_get_audio_guide_tokens_c(cactus_context_handle_t handle, const char* text_to_speak);
|
|
171
|
+
|
|
172
|
+
CACTUS_FFI_EXPORT cactus_float_array_c_t cactus_decode_audio_tokens_c(cactus_context_handle_t handle, const int32_t* tokens, int32_t count);
|
|
173
|
+
|
|
174
|
+
CACTUS_FFI_EXPORT void cactus_release_vocoder_c(cactus_context_handle_t handle);
|
|
175
|
+
|
|
176
|
+
// **HIGH PRIORITY ADDITIONS**
|
|
177
|
+
|
|
178
|
+
typedef struct {
|
|
179
|
+
const char* path;
|
|
180
|
+
float scale;
|
|
181
|
+
} cactus_lora_adapter_c_t;
|
|
182
|
+
|
|
183
|
+
typedef struct {
|
|
184
|
+
cactus_lora_adapter_c_t* adapters;
|
|
185
|
+
int32_t count;
|
|
186
|
+
} cactus_lora_adapters_c_t;
|
|
187
|
+
|
|
188
|
+
typedef struct {
|
|
189
|
+
char* model_name;
|
|
190
|
+
int64_t model_size;
|
|
191
|
+
int64_t model_params;
|
|
192
|
+
double pp_avg;
|
|
193
|
+
double pp_std;
|
|
194
|
+
double tg_avg;
|
|
195
|
+
double tg_std;
|
|
196
|
+
} cactus_bench_result_c_t;
|
|
197
|
+
|
|
198
|
+
// **HIGH PRIORITY: Benchmarking**
|
|
199
|
+
CACTUS_FFI_EXPORT cactus_bench_result_c_t cactus_bench_c(cactus_context_handle_t handle, int pp, int tg, int pl, int nr);
|
|
200
|
+
|
|
201
|
+
// **HIGH PRIORITY: LoRA Adapter Support**
|
|
202
|
+
CACTUS_FFI_EXPORT int cactus_apply_lora_adapters_c(cactus_context_handle_t handle, const cactus_lora_adapters_c_t* adapters);
|
|
203
|
+
CACTUS_FFI_EXPORT void cactus_remove_lora_adapters_c(cactus_context_handle_t handle);
|
|
204
|
+
CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cactus_context_handle_t handle);
|
|
205
|
+
|
|
206
|
+
// **HIGH PRIORITY: Chat Template Support**
|
|
207
|
+
CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
|
|
208
|
+
CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
|
|
209
|
+
|
|
210
|
+
// **HIGH PRIORITY: Context Management**
|
|
211
|
+
CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
|
|
212
|
+
CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
|
|
213
|
+
|
|
214
|
+
// **HIGH PRIORITY: Model Information**
|
|
215
|
+
CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
|
|
216
|
+
CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
|
|
217
|
+
CACTUS_FFI_EXPORT char* cactus_get_model_desc_c(cactus_context_handle_t handle);
|
|
218
|
+
CACTUS_FFI_EXPORT int64_t cactus_get_model_size_c(cactus_context_handle_t handle);
|
|
219
|
+
CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t handle);
|
|
220
|
+
|
|
221
|
+
// Memory management functions
|
|
222
|
+
CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
|
|
223
|
+
CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
|
|
224
|
+
|
|
225
|
+
#ifdef __cplusplus
|
|
226
|
+
}
|
|
227
|
+
#endif
|
|
228
|
+
|
|
229
|
+
#endif
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#pragma once
|
|
4
4
|
|
|
5
5
|
#include "common.h"
|
|
6
|
+
#include <chrono>
|
|
6
7
|
#include <string>
|
|
7
8
|
#include <vector>
|
|
8
9
|
#include "minja/chat-template.hpp"
|
|
@@ -79,6 +80,7 @@ struct common_chat_templates_inputs {
|
|
|
79
80
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
80
81
|
bool parallel_tool_calls = false;
|
|
81
82
|
bool extract_reasoning = true;
|
|
83
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
82
84
|
};
|
|
83
85
|
|
|
84
86
|
struct common_chat_params {
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
#include <set>
|
|
8
8
|
#include <string>
|
|
9
|
+
#include <string_view>
|
|
9
10
|
#include <vector>
|
|
10
11
|
#include <sstream>
|
|
11
12
|
|
|
@@ -42,17 +43,6 @@ extern const char * LLAMA_BUILD_TARGET;
|
|
|
42
43
|
|
|
43
44
|
struct common_control_vector_load_info;
|
|
44
45
|
|
|
45
|
-
#define print_build_info() do { \
|
|
46
|
-
fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \
|
|
47
|
-
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
|
|
48
|
-
} while(0)
|
|
49
|
-
|
|
50
|
-
// build info
|
|
51
|
-
extern int LLAMA_BUILD_NUMBER;
|
|
52
|
-
extern char const *LLAMA_COMMIT;
|
|
53
|
-
extern char const *LLAMA_COMPILER;
|
|
54
|
-
extern char const *LLAMA_BUILD_TARGET;
|
|
55
|
-
|
|
56
46
|
//
|
|
57
47
|
// CPU utils
|
|
58
48
|
//
|
|
@@ -77,7 +67,6 @@ enum llama_example {
|
|
|
77
67
|
LLAMA_EXAMPLE_COMMON,
|
|
78
68
|
LLAMA_EXAMPLE_SPECULATIVE,
|
|
79
69
|
LLAMA_EXAMPLE_MAIN,
|
|
80
|
-
LLAMA_EXAMPLE_INFILL,
|
|
81
70
|
LLAMA_EXAMPLE_EMBEDDING,
|
|
82
71
|
LLAMA_EXAMPLE_PERPLEXITY,
|
|
83
72
|
LLAMA_EXAMPLE_RETRIEVAL,
|
|
@@ -87,7 +76,7 @@ enum llama_example {
|
|
|
87
76
|
LLAMA_EXAMPLE_SERVER,
|
|
88
77
|
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
|
89
78
|
LLAMA_EXAMPLE_EXPORT_LORA,
|
|
90
|
-
|
|
79
|
+
LLAMA_EXAMPLE_MTMD,
|
|
91
80
|
LLAMA_EXAMPLE_LOOKUP,
|
|
92
81
|
LLAMA_EXAMPLE_PARALLEL,
|
|
93
82
|
LLAMA_EXAMPLE_TTS,
|
|
@@ -107,6 +96,7 @@ enum common_sampler_type {
|
|
|
107
96
|
COMMON_SAMPLER_TYPE_XTC = 8,
|
|
108
97
|
COMMON_SAMPLER_TYPE_INFILL = 9,
|
|
109
98
|
COMMON_SAMPLER_TYPE_PENALTIES = 10,
|
|
99
|
+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
|
|
110
100
|
};
|
|
111
101
|
|
|
112
102
|
// dimensionality reduction methods, used by cvector-generator
|
|
@@ -132,10 +122,6 @@ struct common_grammar_trigger {
|
|
|
132
122
|
common_grammar_trigger_type type;
|
|
133
123
|
std::string value;
|
|
134
124
|
llama_token token = LLAMA_TOKEN_NULL;
|
|
135
|
-
|
|
136
|
-
// T can only be nlohmann::ordered_json
|
|
137
|
-
template <class T> T to_json() const;
|
|
138
|
-
template <class T> static common_grammar_trigger from_json(const T & in);
|
|
139
125
|
};
|
|
140
126
|
|
|
141
127
|
// sampling parameters
|
|
@@ -176,6 +162,7 @@ struct common_params_sampling {
|
|
|
176
162
|
std::vector<enum common_sampler_type> samplers = {
|
|
177
163
|
COMMON_SAMPLER_TYPE_PENALTIES,
|
|
178
164
|
COMMON_SAMPLER_TYPE_DRY,
|
|
165
|
+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA,
|
|
179
166
|
COMMON_SAMPLER_TYPE_TOP_K,
|
|
180
167
|
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
|
181
168
|
COMMON_SAMPLER_TYPE_TOP_P,
|
|
@@ -195,6 +182,13 @@ struct common_params_sampling {
|
|
|
195
182
|
std::string print() const;
|
|
196
183
|
};
|
|
197
184
|
|
|
185
|
+
struct common_params_model {
|
|
186
|
+
std::string path = ""; // model local path // NOLINT
|
|
187
|
+
std::string url = ""; // model url to download // NOLINT
|
|
188
|
+
std::string hf_repo = ""; // HF repo // NOLINT
|
|
189
|
+
std::string hf_file = ""; // HF file // NOLINT
|
|
190
|
+
};
|
|
191
|
+
|
|
198
192
|
struct common_params_speculative {
|
|
199
193
|
std::vector<lm_ggml_backend_dev_t> devices; // devices to use for offloading
|
|
200
194
|
|
|
@@ -208,19 +202,11 @@ struct common_params_speculative {
|
|
|
208
202
|
struct cpu_params cpuparams;
|
|
209
203
|
struct cpu_params cpuparams_batch;
|
|
210
204
|
|
|
211
|
-
|
|
212
|
-
std::string hf_file = ""; // HF file // NOLINT
|
|
213
|
-
|
|
214
|
-
std::string model = ""; // draft model for speculative decoding // NOLINT
|
|
215
|
-
std::string model_url = ""; // model url to download // NOLINT
|
|
205
|
+
struct common_params_model model;
|
|
216
206
|
};
|
|
217
207
|
|
|
218
208
|
struct common_params_vocoder {
|
|
219
|
-
|
|
220
|
-
std::string hf_file = ""; // HF file // NOLINT
|
|
221
|
-
|
|
222
|
-
std::string model = ""; // model path // NOLINT
|
|
223
|
-
std::string model_url = ""; // model url to download // NOLINT
|
|
209
|
+
struct common_params_model model;
|
|
224
210
|
|
|
225
211
|
std::string speaker_file = ""; // speaker file path // NOLINT
|
|
226
212
|
|
|
@@ -279,12 +265,10 @@ struct common_params {
|
|
|
279
265
|
struct common_params_speculative speculative;
|
|
280
266
|
struct common_params_vocoder vocoder;
|
|
281
267
|
|
|
282
|
-
|
|
268
|
+
struct common_params_model model;
|
|
269
|
+
|
|
283
270
|
std::string model_alias = ""; // model alias // NOLINT
|
|
284
|
-
std::string model_url = ""; // model url to download // NOLINT
|
|
285
271
|
std::string hf_token = ""; // HF token // NOLINT
|
|
286
|
-
std::string hf_repo = ""; // HF repo // NOLINT
|
|
287
|
-
std::string hf_file = ""; // HF file // NOLINT
|
|
288
272
|
std::string prompt = ""; // NOLINT
|
|
289
273
|
std::string system_prompt = ""; // NOLINT
|
|
290
274
|
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
|
@@ -298,6 +282,7 @@ struct common_params {
|
|
|
298
282
|
std::vector<std::string> in_files; // all input files
|
|
299
283
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
|
300
284
|
std::vector<llama_model_kv_override> kv_overrides;
|
|
285
|
+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
|
301
286
|
|
|
302
287
|
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
|
|
303
288
|
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
|
|
@@ -339,17 +324,17 @@ struct common_params {
|
|
|
339
324
|
bool flash_attn = false; // flash attention
|
|
340
325
|
bool no_perf = false; // disable performance metrics
|
|
341
326
|
bool ctx_shift = true; // context shift on inifinite text generation
|
|
327
|
+
bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
342
328
|
|
|
343
329
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
|
344
|
-
bool logits_all = false; // return logits for all tokens in the batch
|
|
345
330
|
bool use_mmap = true; // use mmap for faster loads
|
|
346
331
|
bool use_mlock = false; // use mlock to keep model in memory
|
|
347
332
|
bool verbose_prompt = false; // print prompt tokens before generation
|
|
348
333
|
bool display_prompt = true; // print prompt before generation
|
|
349
|
-
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
|
350
334
|
bool no_kv_offload = false; // disable KV offloading
|
|
351
335
|
bool warmup = true; // warmup run
|
|
352
336
|
bool check_tensors = false; // validate tensor data
|
|
337
|
+
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
|
353
338
|
|
|
354
339
|
bool single_turn = false; // single turn chat conversation
|
|
355
340
|
|
|
@@ -361,8 +346,10 @@ struct common_params {
|
|
|
361
346
|
|
|
362
347
|
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
|
363
348
|
|
|
364
|
-
// multimodal models (see
|
|
365
|
-
|
|
349
|
+
// multimodal models (see tools/mtmd)
|
|
350
|
+
struct common_params_model mmproj;
|
|
351
|
+
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
|
352
|
+
bool no_mmproj = false; // explicitly disable multimodal model
|
|
366
353
|
std::vector<std::string> image; // path to image file(s)
|
|
367
354
|
|
|
368
355
|
// embedding
|
|
@@ -385,6 +372,7 @@ struct common_params {
|
|
|
385
372
|
bool use_jinja = false; // NOLINT
|
|
386
373
|
bool enable_chat_template = true;
|
|
387
374
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
375
|
+
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
|
388
376
|
|
|
389
377
|
std::vector<std::string> api_keys;
|
|
390
378
|
|
|
@@ -428,13 +416,14 @@ struct common_params {
|
|
|
428
416
|
|
|
429
417
|
bool process_output = false; // collect data for the output tensor
|
|
430
418
|
bool compute_ppl = true; // whether to compute perplexity
|
|
419
|
+
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
|
|
431
420
|
|
|
432
421
|
// cvector-generator params
|
|
433
422
|
int n_pca_batch = 100;
|
|
434
423
|
int n_pca_iterations = 1000;
|
|
435
424
|
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
|
436
|
-
std::string cvector_positive_file = "
|
|
437
|
-
std::string cvector_negative_file = "
|
|
425
|
+
std::string cvector_positive_file = "tools/cvector-generator/positive.txt";
|
|
426
|
+
std::string cvector_negative_file = "tools/cvector-generator/negative.txt";
|
|
438
427
|
|
|
439
428
|
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
|
440
429
|
|
|
@@ -443,6 +432,11 @@ struct common_params {
|
|
|
443
432
|
|
|
444
433
|
// common params
|
|
445
434
|
std::string out_file; // output filename for all example programs
|
|
435
|
+
// optional callback for model loading progress and cancellation:
|
|
436
|
+
// called with a progress value between 0.0 and 1.0.
|
|
437
|
+
// return false from callback to abort model loading or true to continue
|
|
438
|
+
llama_progress_callback load_progress_callback = NULL;
|
|
439
|
+
void * load_progress_callback_user_data = NULL;
|
|
446
440
|
};
|
|
447
441
|
|
|
448
442
|
// call once at the start of a program if it uses libcommon
|
|
@@ -520,10 +514,9 @@ static bool string_starts_with(const std::string & str,
|
|
|
520
514
|
return str.rfind(prefix, 0) == 0;
|
|
521
515
|
}
|
|
522
516
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
}
|
|
517
|
+
// While we wait for C++20's std::string::ends_with...
|
|
518
|
+
bool string_ends_with(const std::string_view & str, const std::string_view & suffix);
|
|
519
|
+
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop);
|
|
527
520
|
|
|
528
521
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
|
529
522
|
void string_process_escapes(std::string & input);
|
|
@@ -564,6 +557,8 @@ struct lm_ggml_threadpool_params lm_ggml_threadpool_params_from_cpu_params(const
|
|
|
564
557
|
// clear LoRA adapters from context, then apply new list of adapters
|
|
565
558
|
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
|
|
566
559
|
|
|
560
|
+
std::string get_model_endpoint();
|
|
561
|
+
|
|
567
562
|
//
|
|
568
563
|
// Batch utils
|
|
569
564
|
//
|
|
@@ -630,16 +625,6 @@ std::string common_detokenize(
|
|
|
630
625
|
const std::vector<llama_token> & tokens,
|
|
631
626
|
bool special = true);
|
|
632
627
|
|
|
633
|
-
//
|
|
634
|
-
// KV cache utils
|
|
635
|
-
//
|
|
636
|
-
|
|
637
|
-
// Dump the KV cache view with the number of sequences per cell.
|
|
638
|
-
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
|
639
|
-
|
|
640
|
-
// Dump the KV cache view showing individual sequences in each cell (long output).
|
|
641
|
-
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
|
642
|
-
|
|
643
628
|
//
|
|
644
629
|
// Embedding utils
|
|
645
630
|
//
|
|
@@ -681,3 +666,9 @@ const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
|
|
681
666
|
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
|
682
667
|
|
|
683
668
|
}
|
|
669
|
+
|
|
670
|
+
//
|
|
671
|
+
// training utils
|
|
672
|
+
//
|
|
673
|
+
|
|
674
|
+
lm_ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);
|