cui-llama.rn 1.4.4 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -2
- package/android/src/main/jni.cpp +54 -34
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1085
- package/cpp/chat.h +143 -0
- package/cpp/common.cpp +1562 -1996
- package/cpp/common.h +677 -744
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-alloc.c +1039 -1030
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +255 -255
- package/cpp/ggml-backend-reg.cpp +586 -582
- package/cpp/ggml-backend.cpp +2004 -2002
- package/cpp/ggml-backend.h +354 -354
- package/cpp/ggml-common.h +1857 -1851
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +512 -380
- package/cpp/ggml-cpu-quants.c +13026 -11517
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +3438 -14485
- package/cpp/ggml-cpu.cpp +655 -633
- package/cpp/ggml-cpu.h +138 -135
- package/cpp/ggml-impl.h +594 -567
- package/cpp/ggml-metal-impl.h +312 -3
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +5360 -5002
- package/cpp/ggml-opt.cpp +854 -854
- package/cpp/ggml-opt.h +216 -216
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6618 -6524
- package/cpp/ggml.h +2222 -2194
- package/cpp/gguf.cpp +1330 -1329
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1024 -1025
- package/cpp/json-schema-to-grammar.h +21 -22
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +382 -347
- package/cpp/llama-adapter.h +76 -74
- package/cpp/llama-arch.cpp +1714 -1492
- package/cpp/llama-arch.h +428 -402
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +640 -587
- package/cpp/llama-chat.h +56 -53
- package/cpp/llama-context.cpp +2831 -1775
- package/cpp/llama-context.h +265 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +38 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1219 -1219
- package/cpp/llama-grammar.h +173 -164
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +79 -71
- package/cpp/llama-hparams.h +156 -139
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +1380 -718
- package/cpp/llama-kv-cache.h +213 -218
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +600 -590
- package/cpp/llama-mmap.h +68 -68
- package/cpp/llama-model-loader.cpp +1129 -1124
- package/cpp/llama-model-loader.h +169 -167
- package/cpp/llama-model.cpp +13080 -4023
- package/cpp/llama-model.h +409 -370
- package/cpp/llama-sampling.cpp +2563 -2525
- package/cpp/llama-sampling.h +32 -32
- package/cpp/llama-vocab.cpp +3295 -3252
- package/cpp/llama-vocab.h +125 -125
- package/cpp/llama.cpp +351 -10137
- package/cpp/llama.h +1434 -1340
- package/cpp/log.cpp +427 -423
- package/cpp/log.h +132 -132
- package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
- package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +45 -71
- package/cpp/rn-llama.h +3 -3
- package/cpp/sampling.cpp +573 -532
- package/cpp/sgemm.cpp +3043 -2598
- package/cpp/sgemm.h +14 -14
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +278 -277
- package/cpp/speculative.h +28 -28
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +5 -2
- package/ios/RNLlama.mm +2 -2
- package/ios/RNLlamaContext.mm +40 -24
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +6 -4
- package/src/index.ts +3 -1
- package/android/src/main/build-arm64/CMakeCache.txt +0 -429
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
- package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
- package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
- package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
- package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
- package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
- package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
- package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
- package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
- package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
- package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
- package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
- package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
- package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
- package/android/src/main/build-arm64/Makefile +0 -1862
- package/android/src/main/build-arm64/cmake_install.cmake +0 -66
- package/cpp/chat.hpp +0 -55
- package/cpp/rn-llama.hpp +0 -913
package/cpp/llama-impl.cpp
CHANGED
@@ -1,167 +1,167 @@
|
|
1
|
-
#include "llama-impl.h"
|
2
|
-
|
3
|
-
#include "gguf.h"
|
4
|
-
#include "llama.h"
|
5
|
-
|
6
|
-
#include <cinttypes>
|
7
|
-
#include <climits>
|
8
|
-
#include <cstdarg>
|
9
|
-
#include <cstring>
|
10
|
-
#include <vector>
|
11
|
-
#include <sstream>
|
12
|
-
|
13
|
-
struct llama_logger_state {
|
14
|
-
lm_ggml_log_callback log_callback = llama_log_callback_default;
|
15
|
-
void * log_callback_user_data = nullptr;
|
16
|
-
};
|
17
|
-
|
18
|
-
static llama_logger_state g_logger_state;
|
19
|
-
|
20
|
-
time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : lm_ggml_time_us()), t_acc(t_acc) {}
|
21
|
-
|
22
|
-
time_meas::~time_meas() {
|
23
|
-
if (t_start_us >= 0) {
|
24
|
-
t_acc += lm_ggml_time_us() - t_start_us;
|
25
|
-
}
|
26
|
-
}
|
27
|
-
|
28
|
-
void llama_log_set(lm_ggml_log_callback log_callback, void * user_data) {
|
29
|
-
lm_ggml_log_set(log_callback, user_data);
|
30
|
-
g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
|
31
|
-
g_logger_state.log_callback_user_data = user_data;
|
32
|
-
}
|
33
|
-
|
34
|
-
static void llama_log_internal_v(lm_ggml_log_level level, const char * format, va_list args) {
|
35
|
-
va_list args_copy;
|
36
|
-
va_copy(args_copy, args);
|
37
|
-
char buffer[128];
|
38
|
-
int len = vsnprintf(buffer, 128, format, args);
|
39
|
-
if (len < 128) {
|
40
|
-
g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
|
41
|
-
} else {
|
42
|
-
char * buffer2 = new char[len + 1];
|
43
|
-
vsnprintf(buffer2, len + 1, format, args_copy);
|
44
|
-
buffer2[len] = 0;
|
45
|
-
g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
|
46
|
-
delete[] buffer2;
|
47
|
-
}
|
48
|
-
va_end(args_copy);
|
49
|
-
}
|
50
|
-
|
51
|
-
void llama_log_internal(lm_ggml_log_level level, const char * format, ...) {
|
52
|
-
va_list args;
|
53
|
-
va_start(args, format);
|
54
|
-
llama_log_internal_v(level, format, args);
|
55
|
-
va_end(args);
|
56
|
-
}
|
57
|
-
|
58
|
-
void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data) {
|
59
|
-
(void) level;
|
60
|
-
(void) user_data;
|
61
|
-
fputs(text, stderr);
|
62
|
-
fflush(stderr);
|
63
|
-
}
|
64
|
-
|
65
|
-
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
66
|
-
if (search.empty()) {
|
67
|
-
return;
|
68
|
-
}
|
69
|
-
std::string builder;
|
70
|
-
builder.reserve(s.length());
|
71
|
-
size_t pos = 0;
|
72
|
-
size_t last_pos = 0;
|
73
|
-
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
74
|
-
builder.append(s, last_pos, pos - last_pos);
|
75
|
-
builder.append(replace);
|
76
|
-
last_pos = pos + search.length();
|
77
|
-
}
|
78
|
-
builder.append(s, last_pos, std::string::npos);
|
79
|
-
s = std::move(builder);
|
80
|
-
}
|
81
|
-
|
82
|
-
std::string format(const char * fmt, ...) {
|
83
|
-
va_list ap;
|
84
|
-
va_list ap2;
|
85
|
-
va_start(ap, fmt);
|
86
|
-
va_copy(ap2, ap);
|
87
|
-
int size = vsnprintf(NULL, 0, fmt, ap);
|
88
|
-
LM_GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
|
89
|
-
std::vector<char> buf(size + 1);
|
90
|
-
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
91
|
-
LM_GGML_ASSERT(size2 == size);
|
92
|
-
va_end(ap2);
|
93
|
-
va_end(ap);
|
94
|
-
return std::string(buf.data(), size);
|
95
|
-
}
|
96
|
-
|
97
|
-
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
|
98
|
-
char buf[256];
|
99
|
-
snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
|
100
|
-
for (size_t i = 1; i < ne.size(); i++) {
|
101
|
-
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
|
102
|
-
}
|
103
|
-
return buf;
|
104
|
-
}
|
105
|
-
|
106
|
-
std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t) {
|
107
|
-
char buf[256];
|
108
|
-
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
|
109
|
-
for (int i = 1; i < LM_GGML_MAX_DIMS; i++) {
|
110
|
-
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
|
111
|
-
}
|
112
|
-
return buf;
|
113
|
-
}
|
114
|
-
|
115
|
-
static std::string lm_gguf_data_to_str(enum lm_gguf_type type, const void * data, int i) {
|
116
|
-
switch (type) {
|
117
|
-
case LM_GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
|
118
|
-
case LM_GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
|
119
|
-
case LM_GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
|
120
|
-
case LM_GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
|
121
|
-
case LM_GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
|
122
|
-
case LM_GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
|
123
|
-
case LM_GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
|
124
|
-
case LM_GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
|
125
|
-
case LM_GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
|
126
|
-
case LM_GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
|
127
|
-
case LM_GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
|
128
|
-
default: return format("unknown type %d", type);
|
129
|
-
}
|
130
|
-
}
|
131
|
-
|
132
|
-
std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i) {
|
133
|
-
const enum lm_gguf_type type = lm_gguf_get_kv_type(ctx_gguf, i);
|
134
|
-
|
135
|
-
switch (type) {
|
136
|
-
case LM_GGUF_TYPE_STRING:
|
137
|
-
return lm_gguf_get_val_str(ctx_gguf, i);
|
138
|
-
case LM_GGUF_TYPE_ARRAY:
|
139
|
-
{
|
140
|
-
const enum lm_gguf_type arr_type = lm_gguf_get_arr_type(ctx_gguf, i);
|
141
|
-
int arr_n = lm_gguf_get_arr_n(ctx_gguf, i);
|
142
|
-
const void * data = arr_type == LM_GGUF_TYPE_STRING ? nullptr : lm_gguf_get_arr_data(ctx_gguf, i);
|
143
|
-
std::stringstream ss;
|
144
|
-
ss << "[";
|
145
|
-
for (int j = 0; j < arr_n; j++) {
|
146
|
-
if (arr_type == LM_GGUF_TYPE_STRING) {
|
147
|
-
std::string val = lm_gguf_get_arr_str(ctx_gguf, i, j);
|
148
|
-
// escape quotes
|
149
|
-
replace_all(val, "\\", "\\\\");
|
150
|
-
replace_all(val, "\"", "\\\"");
|
151
|
-
ss << '"' << val << '"';
|
152
|
-
} else if (arr_type == LM_GGUF_TYPE_ARRAY) {
|
153
|
-
ss << "???";
|
154
|
-
} else {
|
155
|
-
ss << lm_gguf_data_to_str(arr_type, data, j);
|
156
|
-
}
|
157
|
-
if (j < arr_n - 1) {
|
158
|
-
ss << ", ";
|
159
|
-
}
|
160
|
-
}
|
161
|
-
ss << "]";
|
162
|
-
return ss.str();
|
163
|
-
}
|
164
|
-
default:
|
165
|
-
return lm_gguf_data_to_str(type, lm_gguf_get_val_data(ctx_gguf, i), 0);
|
166
|
-
}
|
167
|
-
}
|
1
|
+
#include "llama-impl.h"
|
2
|
+
|
3
|
+
#include "gguf.h"
|
4
|
+
#include "llama.h"
|
5
|
+
|
6
|
+
#include <cinttypes>
|
7
|
+
#include <climits>
|
8
|
+
#include <cstdarg>
|
9
|
+
#include <cstring>
|
10
|
+
#include <vector>
|
11
|
+
#include <sstream>
|
12
|
+
|
13
|
+
struct llama_logger_state {
|
14
|
+
lm_ggml_log_callback log_callback = llama_log_callback_default;
|
15
|
+
void * log_callback_user_data = nullptr;
|
16
|
+
};
|
17
|
+
|
18
|
+
static llama_logger_state g_logger_state;
|
19
|
+
|
20
|
+
time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : lm_ggml_time_us()), t_acc(t_acc) {}
|
21
|
+
|
22
|
+
time_meas::~time_meas() {
|
23
|
+
if (t_start_us >= 0) {
|
24
|
+
t_acc += lm_ggml_time_us() - t_start_us;
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
void llama_log_set(lm_ggml_log_callback log_callback, void * user_data) {
|
29
|
+
lm_ggml_log_set(log_callback, user_data);
|
30
|
+
g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
|
31
|
+
g_logger_state.log_callback_user_data = user_data;
|
32
|
+
}
|
33
|
+
|
34
|
+
static void llama_log_internal_v(lm_ggml_log_level level, const char * format, va_list args) {
|
35
|
+
va_list args_copy;
|
36
|
+
va_copy(args_copy, args);
|
37
|
+
char buffer[128];
|
38
|
+
int len = vsnprintf(buffer, 128, format, args);
|
39
|
+
if (len < 128) {
|
40
|
+
g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
|
41
|
+
} else {
|
42
|
+
char * buffer2 = new char[len + 1];
|
43
|
+
vsnprintf(buffer2, len + 1, format, args_copy);
|
44
|
+
buffer2[len] = 0;
|
45
|
+
g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
|
46
|
+
delete[] buffer2;
|
47
|
+
}
|
48
|
+
va_end(args_copy);
|
49
|
+
}
|
50
|
+
|
51
|
+
void llama_log_internal(lm_ggml_log_level level, const char * format, ...) {
|
52
|
+
va_list args;
|
53
|
+
va_start(args, format);
|
54
|
+
llama_log_internal_v(level, format, args);
|
55
|
+
va_end(args);
|
56
|
+
}
|
57
|
+
|
58
|
+
void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data) {
|
59
|
+
(void) level;
|
60
|
+
(void) user_data;
|
61
|
+
fputs(text, stderr);
|
62
|
+
fflush(stderr);
|
63
|
+
}
|
64
|
+
|
65
|
+
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
66
|
+
if (search.empty()) {
|
67
|
+
return;
|
68
|
+
}
|
69
|
+
std::string builder;
|
70
|
+
builder.reserve(s.length());
|
71
|
+
size_t pos = 0;
|
72
|
+
size_t last_pos = 0;
|
73
|
+
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
74
|
+
builder.append(s, last_pos, pos - last_pos);
|
75
|
+
builder.append(replace);
|
76
|
+
last_pos = pos + search.length();
|
77
|
+
}
|
78
|
+
builder.append(s, last_pos, std::string::npos);
|
79
|
+
s = std::move(builder);
|
80
|
+
}
|
81
|
+
|
82
|
+
std::string format(const char * fmt, ...) {
|
83
|
+
va_list ap;
|
84
|
+
va_list ap2;
|
85
|
+
va_start(ap, fmt);
|
86
|
+
va_copy(ap2, ap);
|
87
|
+
int size = vsnprintf(NULL, 0, fmt, ap);
|
88
|
+
LM_GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
|
89
|
+
std::vector<char> buf(size + 1);
|
90
|
+
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
91
|
+
LM_GGML_ASSERT(size2 == size);
|
92
|
+
va_end(ap2);
|
93
|
+
va_end(ap);
|
94
|
+
return std::string(buf.data(), size);
|
95
|
+
}
|
96
|
+
|
97
|
+
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
|
98
|
+
char buf[256];
|
99
|
+
snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
|
100
|
+
for (size_t i = 1; i < ne.size(); i++) {
|
101
|
+
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
|
102
|
+
}
|
103
|
+
return buf;
|
104
|
+
}
|
105
|
+
|
106
|
+
std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t) {
|
107
|
+
char buf[256];
|
108
|
+
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
|
109
|
+
for (int i = 1; i < LM_GGML_MAX_DIMS; i++) {
|
110
|
+
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
|
111
|
+
}
|
112
|
+
return buf;
|
113
|
+
}
|
114
|
+
|
115
|
+
static std::string lm_gguf_data_to_str(enum lm_gguf_type type, const void * data, int i) {
|
116
|
+
switch (type) {
|
117
|
+
case LM_GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
|
118
|
+
case LM_GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
|
119
|
+
case LM_GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
|
120
|
+
case LM_GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
|
121
|
+
case LM_GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
|
122
|
+
case LM_GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
|
123
|
+
case LM_GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
|
124
|
+
case LM_GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
|
125
|
+
case LM_GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
|
126
|
+
case LM_GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
|
127
|
+
case LM_GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
|
128
|
+
default: return format("unknown type %d", type);
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i) {
|
133
|
+
const enum lm_gguf_type type = lm_gguf_get_kv_type(ctx_gguf, i);
|
134
|
+
|
135
|
+
switch (type) {
|
136
|
+
case LM_GGUF_TYPE_STRING:
|
137
|
+
return lm_gguf_get_val_str(ctx_gguf, i);
|
138
|
+
case LM_GGUF_TYPE_ARRAY:
|
139
|
+
{
|
140
|
+
const enum lm_gguf_type arr_type = lm_gguf_get_arr_type(ctx_gguf, i);
|
141
|
+
int arr_n = lm_gguf_get_arr_n(ctx_gguf, i);
|
142
|
+
const void * data = arr_type == LM_GGUF_TYPE_STRING ? nullptr : lm_gguf_get_arr_data(ctx_gguf, i);
|
143
|
+
std::stringstream ss;
|
144
|
+
ss << "[";
|
145
|
+
for (int j = 0; j < arr_n; j++) {
|
146
|
+
if (arr_type == LM_GGUF_TYPE_STRING) {
|
147
|
+
std::string val = lm_gguf_get_arr_str(ctx_gguf, i, j);
|
148
|
+
// escape quotes
|
149
|
+
replace_all(val, "\\", "\\\\");
|
150
|
+
replace_all(val, "\"", "\\\"");
|
151
|
+
ss << '"' << val << '"';
|
152
|
+
} else if (arr_type == LM_GGUF_TYPE_ARRAY) {
|
153
|
+
ss << "???";
|
154
|
+
} else {
|
155
|
+
ss << lm_gguf_data_to_str(arr_type, data, j);
|
156
|
+
}
|
157
|
+
if (j < arr_n - 1) {
|
158
|
+
ss << ", ";
|
159
|
+
}
|
160
|
+
}
|
161
|
+
ss << "]";
|
162
|
+
return ss.str();
|
163
|
+
}
|
164
|
+
default:
|
165
|
+
return lm_gguf_data_to_str(type, lm_gguf_get_val_data(ctx_gguf, i), 0);
|
166
|
+
}
|
167
|
+
}
|
package/cpp/llama-impl.h
CHANGED
@@ -1,61 +1,61 @@
|
|
1
|
-
#pragma once
|
2
|
-
|
3
|
-
#include "ggml.h" // for lm_ggml_log_level
|
4
|
-
|
5
|
-
#include <string>
|
6
|
-
#include <vector>
|
7
|
-
|
8
|
-
#ifdef __GNUC__
|
9
|
-
# if defined(__MINGW32__) && !defined(__clang__)
|
10
|
-
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
11
|
-
# else
|
12
|
-
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
13
|
-
# endif
|
14
|
-
#else
|
15
|
-
# define LLAMA_ATTRIBUTE_FORMAT(...)
|
16
|
-
#endif
|
17
|
-
|
18
|
-
//
|
19
|
-
// logging
|
20
|
-
//
|
21
|
-
|
22
|
-
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
23
|
-
void llama_log_internal (lm_ggml_log_level level, const char * format, ...);
|
24
|
-
void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data);
|
25
|
-
|
26
|
-
#define LLAMA_LOG(...) llama_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
27
|
-
#define LLAMA_LOG_INFO(...) llama_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
28
|
-
#define LLAMA_LOG_WARN(...) llama_log_internal(LM_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
|
29
|
-
#define LLAMA_LOG_ERROR(...) llama_log_internal(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
30
|
-
#define LLAMA_LOG_DEBUG(...) llama_log_internal(LM_GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
|
31
|
-
#define LLAMA_LOG_CONT(...) llama_log_internal(LM_GGML_LOG_LEVEL_CONT , __VA_ARGS__)
|
32
|
-
|
33
|
-
//
|
34
|
-
// helpers
|
35
|
-
//
|
36
|
-
|
37
|
-
template <typename T>
|
38
|
-
struct no_init {
|
39
|
-
T value;
|
40
|
-
no_init() { /* do nothing */ }
|
41
|
-
};
|
42
|
-
|
43
|
-
struct time_meas {
|
44
|
-
time_meas(int64_t & t_acc, bool disable = false);
|
45
|
-
~time_meas();
|
46
|
-
|
47
|
-
const int64_t t_start_us;
|
48
|
-
|
49
|
-
int64_t & t_acc;
|
50
|
-
};
|
51
|
-
|
52
|
-
void replace_all(std::string & s, const std::string & search, const std::string & replace);
|
53
|
-
|
54
|
-
// TODO: rename to llama_format ?
|
55
|
-
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
56
|
-
std::string format(const char * fmt, ...);
|
57
|
-
|
58
|
-
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
|
59
|
-
std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t);
|
60
|
-
|
61
|
-
std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i);
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "ggml.h" // for lm_ggml_log_level
|
4
|
+
|
5
|
+
#include <string>
|
6
|
+
#include <vector>
|
7
|
+
|
8
|
+
#ifdef __GNUC__
|
9
|
+
# if defined(__MINGW32__) && !defined(__clang__)
|
10
|
+
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
11
|
+
# else
|
12
|
+
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
13
|
+
# endif
|
14
|
+
#else
|
15
|
+
# define LLAMA_ATTRIBUTE_FORMAT(...)
|
16
|
+
#endif
|
17
|
+
|
18
|
+
//
|
19
|
+
// logging
|
20
|
+
//
|
21
|
+
|
22
|
+
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
23
|
+
void llama_log_internal (lm_ggml_log_level level, const char * format, ...);
|
24
|
+
void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data);
|
25
|
+
|
26
|
+
#define LLAMA_LOG(...) llama_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
27
|
+
#define LLAMA_LOG_INFO(...) llama_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
28
|
+
#define LLAMA_LOG_WARN(...) llama_log_internal(LM_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
|
29
|
+
#define LLAMA_LOG_ERROR(...) llama_log_internal(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
30
|
+
#define LLAMA_LOG_DEBUG(...) llama_log_internal(LM_GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
|
31
|
+
#define LLAMA_LOG_CONT(...) llama_log_internal(LM_GGML_LOG_LEVEL_CONT , __VA_ARGS__)
|
32
|
+
|
33
|
+
//
|
34
|
+
// helpers
|
35
|
+
//
|
36
|
+
|
37
|
+
template <typename T>
|
38
|
+
struct no_init {
|
39
|
+
T value;
|
40
|
+
no_init() { /* do nothing */ }
|
41
|
+
};
|
42
|
+
|
43
|
+
struct time_meas {
|
44
|
+
time_meas(int64_t & t_acc, bool disable = false);
|
45
|
+
~time_meas();
|
46
|
+
|
47
|
+
const int64_t t_start_us;
|
48
|
+
|
49
|
+
int64_t & t_acc;
|
50
|
+
};
|
51
|
+
|
52
|
+
void replace_all(std::string & s, const std::string & search, const std::string & replace);
|
53
|
+
|
54
|
+
// TODO: rename to llama_format ?
|
55
|
+
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
56
|
+
std::string format(const char * fmt, ...);
|
57
|
+
|
58
|
+
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
|
59
|
+
std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t);
|
60
|
+
|
61
|
+
std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i);
|
package/cpp/llama-io.cpp
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#include "llama-io.h"
|
2
|
+
|
3
|
+
void llama_io_write_i::write_string(const std::string & str) {
|
4
|
+
uint32_t str_size = str.size();
|
5
|
+
|
6
|
+
write(&str_size, sizeof(str_size));
|
7
|
+
write(str.data(), str_size);
|
8
|
+
}
|
9
|
+
|
10
|
+
void llama_io_read_i::read_string(std::string & str) {
|
11
|
+
uint32_t str_size;
|
12
|
+
read_to(&str_size, sizeof(str_size));
|
13
|
+
|
14
|
+
str.assign((const char *) read(str_size), str_size);
|
15
|
+
}
|
package/cpp/llama-io.h
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <cstddef>
|
4
|
+
#include <cstdint>
|
5
|
+
#include <string>
|
6
|
+
|
7
|
+
struct lm_ggml_tensor;
|
8
|
+
|
9
|
+
class llama_io_write_i {
|
10
|
+
public:
|
11
|
+
llama_io_write_i() = default;
|
12
|
+
virtual ~llama_io_write_i() = default;
|
13
|
+
|
14
|
+
virtual void write(const void * src, size_t size) = 0;
|
15
|
+
virtual void write_tensor(const lm_ggml_tensor * tensor, size_t offset, size_t size) = 0;
|
16
|
+
|
17
|
+
// bytes written so far
|
18
|
+
virtual size_t n_bytes() = 0;
|
19
|
+
|
20
|
+
void write_string(const std::string & str);
|
21
|
+
};
|
22
|
+
|
23
|
+
class llama_io_read_i {
|
24
|
+
public:
|
25
|
+
llama_io_read_i() = default;
|
26
|
+
virtual ~llama_io_read_i() = default;
|
27
|
+
|
28
|
+
virtual const uint8_t * read(size_t size) = 0;
|
29
|
+
virtual void read_to(void * dst, size_t size) = 0;
|
30
|
+
|
31
|
+
// bytes read so far
|
32
|
+
virtual size_t n_bytes() = 0;
|
33
|
+
|
34
|
+
void read_string(std::string & str);
|
35
|
+
};
|