@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,50 @@
1
+
2
+ # For more information about using CMake with Android Studio, read the
3
+ # documentation: https://d.android.com/studio/projects/add-native-code.html.
4
+ # For more examples on how to use CMake, see https://github.com/android/ndk-samples.
5
+
6
+ # Sets the minimum CMake version required for this project.
7
+ cmake_minimum_required(VERSION 3.22.1)
8
+
9
+ # Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
10
+ # Since this is the top level CMakeLists.txt, the project name is also accessible
11
+ # with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
12
+ # build script scope).
13
+ project("llama-android")
14
+
15
+ include(FetchContent)
16
+ FetchContent_Declare(
17
+ llama
18
+ GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
19
+ GIT_TAG master
20
+ )
21
+
22
+ # Also provides "common"
23
+ FetchContent_MakeAvailable(llama)
24
+
25
+ # Creates and names a library, sets it as either STATIC
26
+ # or SHARED, and provides the relative paths to its source code.
27
+ # You can define multiple libraries, and CMake builds them for you.
28
+ # Gradle automatically packages shared libraries with your APK.
29
+ #
30
+ # In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
31
+ # the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
32
+ # is preferred for the same purpose.
33
+ #
34
+ # In order to load a library into your app from Java/Kotlin, you must call
35
+ # System.loadLibrary() and pass the name of the library defined here;
36
+ # for GameActivity/NativeActivity derived applications, the same library name must be
37
+ # used in the AndroidManifest.xml file.
38
+ add_library(${CMAKE_PROJECT_NAME} SHARED
39
+ # List C/C++ source files with relative paths to this CMakeLists.txt.
40
+ llama-android.cpp)
41
+
42
+ # Specifies libraries CMake should link to your target library. You
43
+ # can link libraries from various origins, such as libraries defined in this
44
+ # build script, prebuilt third-party libraries, or Android system libraries.
45
+ target_link_libraries(${CMAKE_PROJECT_NAME}
46
+ # List libraries link to the target library
47
+ llama
48
+ common
49
+ android
50
+ log)
@@ -0,0 +1,443 @@
1
+ #include <android/log.h>
2
+ #include <jni.h>
3
+ #include <iomanip>
4
+ #include <math.h>
5
+ #include <string>
6
+ #include <unistd.h>
7
+ #include "llama.h"
8
+ #include "common/common.h"
9
+
10
+ // Write C++ code here.
11
+ //
12
+ // Do not forget to dynamically load the C++ library into your application.
13
+ //
14
+ // For instance,
15
+ //
16
+ // In MainActivity.java:
17
+ // static {
18
+ // System.loadLibrary("llama-android");
19
+ // }
20
+ //
21
+ // Or, in MainActivity.kt:
22
+ // companion object {
23
+ // init {
24
+ // System.loadLibrary("llama-android")
25
+ // }
26
+ // }
27
+
28
+ #define TAG "llama-android.cpp"
29
+ #define LOGi(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
30
+ #define LOGe(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
31
+
32
+ jclass la_int_var;
33
+ jmethodID la_int_var_value;
34
+ jmethodID la_int_var_inc;
35
+
36
+ std::string cached_token_chars;
37
+
38
+ bool is_valid_utf8(const char * string) {
39
+ if (!string) {
40
+ return true;
41
+ }
42
+
43
+ const unsigned char * bytes = (const unsigned char *)string;
44
+ int num;
45
+
46
+ while (*bytes != 0x00) {
47
+ if ((*bytes & 0x80) == 0x00) {
48
+ // U+0000 to U+007F
49
+ num = 1;
50
+ } else if ((*bytes & 0xE0) == 0xC0) {
51
+ // U+0080 to U+07FF
52
+ num = 2;
53
+ } else if ((*bytes & 0xF0) == 0xE0) {
54
+ // U+0800 to U+FFFF
55
+ num = 3;
56
+ } else if ((*bytes & 0xF8) == 0xF0) {
57
+ // U+10000 to U+10FFFF
58
+ num = 4;
59
+ } else {
60
+ return false;
61
+ }
62
+
63
+ bytes += 1;
64
+ for (int i = 1; i < num; ++i) {
65
+ if ((*bytes & 0xC0) != 0x80) {
66
+ return false;
67
+ }
68
+ bytes += 1;
69
+ }
70
+ }
71
+
72
+ return true;
73
+ }
74
+
75
+ static void log_callback(ggml_log_level level, const char * fmt, void * data) {
76
+ if (level == GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
77
+ else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
78
+ else if (level == GGML_LOG_LEVEL_WARN) __android_log_print(ANDROID_LOG_WARN, TAG, fmt, data);
79
+ else __android_log_print(ANDROID_LOG_DEFAULT, TAG, fmt, data);
80
+ }
81
+
82
+ extern "C"
83
+ JNIEXPORT jlong JNICALL
84
+ Java_com_example_llama_Llm_load_1model(JNIEnv *env, jobject, jstring filename) {
85
+ llama_model_params model_params = llama_model_default_params();
86
+
87
+ auto path_to_model = env->GetStringUTFChars(filename, 0);
88
+ LOGi("Loading model from %s", path_to_model);
89
+
90
+ auto model = llama_load_model_from_file(path_to_model, model_params);
91
+ env->ReleaseStringUTFChars(filename, path_to_model);
92
+
93
+ if (!model) {
94
+ LOGe("load_model() failed");
95
+ env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "load_model() failed");
96
+ return 0;
97
+ }
98
+
99
+ return reinterpret_cast<jlong>(model);
100
+ }
101
+
102
+ extern "C"
103
+ JNIEXPORT void JNICALL
104
+ Java_com_example_llama_Llm_free_1model(JNIEnv *, jobject, jlong model) {
105
+ llama_free_model(reinterpret_cast<llama_model *>(model));
106
+ }
107
+
108
+ extern "C"
109
+ JNIEXPORT jlong JNICALL
110
+ Java_com_example_llama_Llm_new_1context(JNIEnv *env, jobject, jlong jmodel) {
111
+ auto model = reinterpret_cast<llama_model *>(jmodel);
112
+
113
+ if (!model) {
114
+ LOGe("new_context(): model cannot be null");
115
+ env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), "Model cannot be null");
116
+ return 0;
117
+ }
118
+
119
+ int n_threads = std::max(1, std::min(8, (int) sysconf(_SC_NPROCESSORS_ONLN) - 2));
120
+ LOGi("Using %d threads", n_threads);
121
+
122
+ llama_context_params ctx_params = llama_context_default_params();
123
+ ctx_params.seed = 1234;
124
+ ctx_params.n_ctx = 2048;
125
+ ctx_params.n_threads = n_threads;
126
+ ctx_params.n_threads_batch = n_threads;
127
+
128
+ llama_context * context = llama_new_context_with_model(model, ctx_params);
129
+
130
+ if (!context) {
131
+ LOGe("llama_new_context_with_model() returned null)");
132
+ env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
133
+ "llama_new_context_with_model() returned null)");
134
+ return 0;
135
+ }
136
+
137
+ return reinterpret_cast<jlong>(context);
138
+ }
139
+
140
+ extern "C"
141
+ JNIEXPORT void JNICALL
142
+ Java_com_example_llama_Llm_free_1context(JNIEnv *, jobject, jlong context) {
143
+ llama_free(reinterpret_cast<llama_context *>(context));
144
+ }
145
+
146
+ extern "C"
147
+ JNIEXPORT void JNICALL
148
+ Java_com_example_llama_Llm_backend_1free(JNIEnv *, jobject) {
149
+ llama_backend_free();
150
+ }
151
+
152
+ extern "C"
153
+ JNIEXPORT void JNICALL
154
+ Java_com_example_llama_Llm_log_1to_1android(JNIEnv *, jobject) {
155
+ llama_log_set(log_callback, NULL);
156
+ }
157
+
158
+ extern "C"
159
+ JNIEXPORT jstring JNICALL
160
+ Java_com_example_llama_Llm_bench_1model(
161
+ JNIEnv *env,
162
+ jobject,
163
+ jlong context_pointer,
164
+ jlong model_pointer,
165
+ jlong batch_pointer,
166
+ jint pp,
167
+ jint tg,
168
+ jint pl,
169
+ jint nr
170
+ ) {
171
+ auto pp_avg = 0.0;
172
+ auto tg_avg = 0.0;
173
+ auto pp_std = 0.0;
174
+ auto tg_std = 0.0;
175
+
176
+ const auto context = reinterpret_cast<llama_context *>(context_pointer);
177
+ const auto model = reinterpret_cast<llama_model *>(model_pointer);
178
+ const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
179
+
180
+ const int n_ctx = llama_n_ctx(context);
181
+
182
+ LOGi("n_ctx = %d", n_ctx);
183
+
184
+ int i, j;
185
+ int nri;
186
+ for (nri = 0; nri < nr; nri++) {
187
+ LOGi("Benchmark prompt processing (pp)");
188
+
189
+ llama_batch_clear(*batch);
190
+
191
+ const int n_tokens = pp;
192
+ for (i = 0; i < n_tokens; i++) {
193
+ llama_batch_add(*batch, 0, i, { 0 }, false);
194
+ }
195
+
196
+ batch->logits[batch->n_tokens - 1] = true;
197
+ llama_kv_cache_clear(context);
198
+
199
+ const auto t_pp_start = ggml_time_us();
200
+ if (llama_decode(context, *batch) != 0) {
201
+ LOGi("llama_decode() failed during prompt processing");
202
+ }
203
+ const auto t_pp_end = ggml_time_us();
204
+
205
+ // bench text generation
206
+
207
+ LOGi("Benchmark text generation (tg)");
208
+
209
+ llama_kv_cache_clear(context);
210
+ const auto t_tg_start = ggml_time_us();
211
+ for (i = 0; i < tg; i++) {
212
+
213
+ llama_batch_clear(*batch);
214
+ for (j = 0; j < pl; j++) {
215
+ llama_batch_add(*batch, 0, i, { j }, true);
216
+ }
217
+
218
+ LOGi("llama_decode() text generation: %d", i);
219
+ if (llama_decode(context, *batch) != 0) {
220
+ LOGi("llama_decode() failed during text generation");
221
+ }
222
+ }
223
+
224
+ const auto t_tg_end = ggml_time_us();
225
+
226
+ llama_kv_cache_clear(context);
227
+
228
+ const auto t_pp = double(t_pp_end - t_pp_start) / 1000000.0;
229
+ const auto t_tg = double(t_tg_end - t_tg_start) / 1000000.0;
230
+
231
+ const auto speed_pp = double(pp) / t_pp;
232
+ const auto speed_tg = double(pl * tg) / t_tg;
233
+
234
+ pp_avg += speed_pp;
235
+ tg_avg += speed_tg;
236
+
237
+ pp_std += speed_pp * speed_pp;
238
+ tg_std += speed_tg * speed_tg;
239
+
240
+ LOGi("pp %f t/s, tg %f t/s", speed_pp, speed_tg);
241
+ }
242
+
243
+ pp_avg /= double(nr);
244
+ tg_avg /= double(nr);
245
+
246
+ if (nr > 1) {
247
+ pp_std = sqrt(pp_std / double(nr - 1) - pp_avg * pp_avg * double(nr) / double(nr - 1));
248
+ tg_std = sqrt(tg_std / double(nr - 1) - tg_avg * tg_avg * double(nr) / double(nr - 1));
249
+ } else {
250
+ pp_std = 0;
251
+ tg_std = 0;
252
+ }
253
+
254
+ char model_desc[128];
255
+ llama_model_desc(model, model_desc, sizeof(model_desc));
256
+
257
+ const auto model_size = double(llama_model_size(model)) / 1024.0 / 1024.0 / 1024.0;
258
+ const auto model_n_params = double(llama_model_n_params(model)) / 1e9;
259
+
260
+ const auto backend = "(Android)"; // TODO: What should this be?
261
+
262
+ std::stringstream result;
263
+ result << std::setprecision(2);
264
+ result << "| model | size | params | backend | test | t/s |\n";
265
+ result << "| --- | --- | --- | --- | --- | --- |\n";
266
+ result << "| " << model_desc << " | " << model_size << "GiB | " << model_n_params << "B | " << backend << " | pp " << pp << " | " << pp_avg << " ± " << pp_std << " |\n";
267
+ result << "| " << model_desc << " | " << model_size << "GiB | " << model_n_params << "B | " << backend << " | tg " << tg << " | " << tg_avg << " ± " << tg_std << " |\n";
268
+
269
+ return env->NewStringUTF(result.str().c_str());
270
+ }
271
+
272
+ extern "C"
273
+ JNIEXPORT void JNICALL
274
+ Java_com_example_llama_Llm_free_1batch(JNIEnv *, jobject, jlong batch_pointer) {
275
+ llama_batch_free(*reinterpret_cast<llama_batch *>(batch_pointer));
276
+ }
277
+
278
+ extern "C"
279
+ JNIEXPORT jlong JNICALL
280
+ Java_com_example_llama_Llm_new_1batch(JNIEnv *, jobject, jint n_tokens, jint embd, jint n_seq_max) {
281
+
282
+ // Source: Copy of llama.cpp:llama_batch_init but heap-allocated.
283
+
284
+ llama_batch *batch = new llama_batch {
285
+ 0,
286
+ nullptr,
287
+ nullptr,
288
+ nullptr,
289
+ nullptr,
290
+ nullptr,
291
+ nullptr,
292
+ 0,
293
+ 0,
294
+ 0,
295
+ };
296
+
297
+ if (embd) {
298
+ batch->embd = (float *) malloc(sizeof(float) * n_tokens * embd);
299
+ } else {
300
+ batch->token = (llama_token *) malloc(sizeof(llama_token) * n_tokens);
301
+ }
302
+
303
+ batch->pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens);
304
+ batch->n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens);
305
+ batch->seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * n_tokens);
306
+ for (int i = 0; i < n_tokens; ++i) {
307
+ batch->seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max);
308
+ }
309
+ batch->logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens);
310
+
311
+ return reinterpret_cast<jlong>(batch);
312
+ }
313
+
314
+ extern "C"
315
+ JNIEXPORT void JNICALL
316
+ Java_com_example_llama_Llm_backend_1init(JNIEnv *, jobject) {
317
+ llama_backend_init();
318
+ }
319
+
320
+ extern "C"
321
+ JNIEXPORT jstring JNICALL
322
+ Java_com_example_llama_Llm_system_1info(JNIEnv *env, jobject) {
323
+ return env->NewStringUTF(llama_print_system_info());
324
+ }
325
+
326
+ extern "C"
327
+ JNIEXPORT jint JNICALL
328
+ Java_com_example_llama_Llm_completion_1init(
329
+ JNIEnv *env,
330
+ jobject,
331
+ jlong context_pointer,
332
+ jlong batch_pointer,
333
+ jstring jtext,
334
+ jint n_len
335
+ ) {
336
+
337
+ cached_token_chars.clear();
338
+
339
+ const auto text = env->GetStringUTFChars(jtext, 0);
340
+ const auto context = reinterpret_cast<llama_context *>(context_pointer);
341
+ const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
342
+
343
+ const auto tokens_list = llama_tokenize(context, text, 1);
344
+
345
+ auto n_ctx = llama_n_ctx(context);
346
+ auto n_kv_req = tokens_list.size() + (n_len - tokens_list.size());
347
+
348
+ LOGi("n_len = %d, n_ctx = %d, n_kv_req = %d", n_len, n_ctx, n_kv_req);
349
+
350
+ if (n_kv_req > n_ctx) {
351
+ LOGe("error: n_kv_req > n_ctx, the required KV cache size is not big enough");
352
+ }
353
+
354
+ for (auto id : tokens_list) {
355
+ LOGi("%s", llama_token_to_piece(context, id).c_str());
356
+ }
357
+
358
+ llama_batch_clear(*batch);
359
+
360
+ // evaluate the initial prompt
361
+ for (auto i = 0; i < tokens_list.size(); i++) {
362
+ llama_batch_add(*batch, tokens_list[i], i, { 0 }, false);
363
+ }
364
+
365
+ // llama_decode will output logits only for the last token of the prompt
366
+ batch->logits[batch->n_tokens - 1] = true;
367
+
368
+ if (llama_decode(context, *batch) != 0) {
369
+ LOGe("llama_decode() failed");
370
+ }
371
+
372
+ env->ReleaseStringUTFChars(jtext, text);
373
+
374
+ return batch->n_tokens;
375
+ }
376
+
377
+ extern "C"
378
+ JNIEXPORT jstring JNICALL
379
+ Java_com_example_llama_Llm_completion_1loop(
380
+ JNIEnv * env,
381
+ jobject,
382
+ jlong context_pointer,
383
+ jlong batch_pointer,
384
+ jint n_len,
385
+ jobject intvar_ncur
386
+ ) {
387
+ const auto context = reinterpret_cast<llama_context *>(context_pointer);
388
+ const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
389
+ const auto model = llama_get_model(context);
390
+
391
+ if (!la_int_var) la_int_var = env->GetObjectClass(intvar_ncur);
392
+ if (!la_int_var_value) la_int_var_value = env->GetMethodID(la_int_var, "getValue", "()I");
393
+ if (!la_int_var_inc) la_int_var_inc = env->GetMethodID(la_int_var, "inc", "()V");
394
+
395
+ auto n_vocab = llama_n_vocab(model);
396
+ auto logits = llama_get_logits_ith(context, batch->n_tokens - 1);
397
+
398
+ std::vector<llama_token_data> candidates;
399
+ candidates.reserve(n_vocab);
400
+
401
+ for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
402
+ candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
403
+ }
404
+
405
+ llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
406
+
407
+ // sample the most likely token
408
+ const auto new_token_id = llama_sample_token_greedy(context, &candidates_p);
409
+
410
+ const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
411
+ if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) {
412
+ return env->NewStringUTF("");
413
+ }
414
+
415
+ auto new_token_chars = llama_token_to_piece(context, new_token_id);
416
+ cached_token_chars += new_token_chars;
417
+
418
+ jstring new_token = nullptr;
419
+ if (is_valid_utf8(cached_token_chars.c_str())) {
420
+ new_token = env->NewStringUTF(cached_token_chars.c_str());
421
+ LOGi("cached: %s, new_token_chars: `%s`, id: %d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id);
422
+ cached_token_chars.clear();
423
+ } else {
424
+ new_token = env->NewStringUTF("");
425
+ }
426
+
427
+ llama_batch_clear(*batch);
428
+ llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true);
429
+
430
+ env->CallVoidMethod(intvar_ncur, la_int_var_inc);
431
+
432
+ if (llama_decode(context, *batch) != 0) {
433
+ LOGe("llama_decode() returned null");
434
+ }
435
+
436
+ return new_token;
437
+ }
438
+
439
+ extern "C"
440
+ JNIEXPORT void JNICALL
441
+ Java_com_example_llama_Llm_kv_1cache_1clear(JNIEnv *, jobject, jlong context) {
442
+ llama_kv_cache_clear(reinterpret_cast<llama_context *>(context));
443
+ }
@@ -0,0 +1,37 @@
1
+ add_library(llava OBJECT
2
+ llava.cpp
3
+ llava.h
4
+ clip.cpp
5
+ clip.h
6
+ )
7
+
8
+ target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
9
+
10
+ target_include_directories(llava PUBLIC .)
11
+ target_include_directories(llava PUBLIC ../..)
12
+ target_include_directories(llava PUBLIC ../../common)
13
+
14
+ target_compile_features(llava PRIVATE cxx_std_11)
15
+
16
+ add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
17
+ if (BUILD_SHARED_LIBS)
18
+ set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
19
+ target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
20
+ add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
21
+ target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
22
+ install(TARGETS llava_shared LIBRARY)
23
+ endif()
24
+
25
+ if (NOT MSVC)
26
+ target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
27
+ endif()
28
+
29
+ if(TARGET BUILD_INFO)
30
+ add_dependencies(llava BUILD_INFO)
31
+ endif()
32
+
33
+ set(TARGET llava-cli)
34
+ add_executable(llava-cli llava-cli.cpp)
35
+ install(TARGETS llava-cli RUNTIME)
36
+ target_link_libraries(llava-cli PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
37
+ target_compile_features(llava PRIVATE cxx_std_11)