cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
package/cpp/rn-llama.cpp CHANGED
@@ -194,6 +194,8 @@ bool llama_rn_context::loadModel(common_params &params_)
194
194
  LOG_ERROR("unable to load model: %s", params_.model.c_str());
195
195
  return false;
196
196
  }
197
+
198
+ templates = common_chat_templates_init(model, params.chat_template);
197
199
  n_ctx = llama_n_ctx(ctx);
198
200
 
199
201
  // We can uncomment for debugging or after this fix: https://github.com/ggerganov/llama.cpp/pull/11101
@@ -202,11 +204,70 @@ bool llama_rn_context::loadModel(common_params &params_)
202
204
  return true;
203
205
  }
204
206
 
205
- bool llama_rn_context::validateModelChatTemplate() const {
206
- const char * tmpl = llama_model_chat_template(model);
207
- llama_chat_message chat[] = {{"user", "test"}};
208
- int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
209
- return chat_res > 0;
207
+ bool llama_rn_context::validateModelChatTemplate(bool use_jinja, const char *name) const {
208
+ const char * tmpl = llama_model_chat_template(model, name);
209
+ if (tmpl == nullptr) {
210
+ return false;
211
+ }
212
+ return common_chat_verify_template(tmpl, use_jinja);
213
+ }
214
+
215
+ common_chat_params llama_rn_context::getFormattedChatWithJinja(
216
+ const std::string &messages,
217
+ const std::string &chat_template,
218
+ const std::string &json_schema,
219
+ const std::string &tools,
220
+ const bool &parallel_tool_calls,
221
+ const std::string &tool_choice
222
+ ) const {
223
+ common_chat_templates_inputs inputs;
224
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
225
+ auto useTools = !tools.empty();
226
+ if (useTools) {
227
+ inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
228
+ }
229
+ inputs.parallel_tool_calls = parallel_tool_calls;
230
+ if (!tool_choice.empty()) {
231
+ inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
232
+ }
233
+
234
+ if (!json_schema.empty()) {
235
+ inputs.json_schema = json_schema;
236
+ }
237
+ inputs.extract_reasoning = params.reasoning_format != COMMON_REASONING_FORMAT_NONE;
238
+
239
+ // If chat_template is provided, create new one and use it (probably slow)
240
+ if (!chat_template.empty()) {
241
+ auto tmp = common_chat_templates_init(model, chat_template);
242
+ return common_chat_templates_apply(tmp.get(), inputs);
243
+ } else {
244
+ return common_chat_templates_apply(templates.get(), inputs);
245
+ }
246
+ }
247
+
248
+ std::string llama_rn_context::getFormattedChat(
249
+ const std::string &messages,
250
+ const std::string &chat_template
251
+ ) const {
252
+ auto chat_json = json::parse(messages);
253
+ common_chat_templates_inputs inputs;
254
+ // Handle regular chat without tools
255
+ std::vector<common_chat_msg> chat_msgs;
256
+ for (const auto &msg : chat_json) {
257
+ chat_msgs.push_back({
258
+ msg["role"].get(),
259
+ msg["content"].get()
260
+ });
261
+ }
262
+ inputs.messages = chat_msgs;
263
+
264
+ // If chat_template is provided, create new one and use it (probably slow)
265
+ if (!chat_template.empty()) {
266
+ auto tmp = common_chat_templates_init(model, chat_template);
267
+ return common_chat_templates_apply(tmp.get(), inputs).prompt;
268
+ } else {
269
+ return common_chat_templates_apply(templates.get(), inputs).prompt;
270
+ }
210
271
  }
211
272
 
212
273
  void llama_rn_context::truncatePrompt(std::vector<llama_token> &prompt_tokens) {
@@ -518,7 +579,6 @@ std::vector<float> llama_rn_context::getEmbedding(common_params &embd_params)
518
579
  float *data;
519
580
 
520
581
  const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
521
- printf("pooling_type: %d\n", pooling_type);
522
582
  if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
523
583
  data = llama_get_embeddings(ctx);
524
584
  } else {
package/cpp/rn-llama.h CHANGED
@@ -3,7 +3,9 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include "chat-template.hpp"
6
7
  #include "common.h"
8
+ #include "chat.h"
7
9
  #include "ggml.h"
8
10
  #include "gguf.h"
9
11
  #include "llama.h"
@@ -13,8 +15,18 @@
13
15
  #include <android/log.h>
14
16
  #endif
15
17
 
18
+ using json = nlohmann::ordered_json;
19
+ typedef minja::chat_template common_chat_template;
20
+
21
+ struct common_chat_templates {
22
+ bool has_explicit_template;
23
+ std::unique_ptr<common_chat_template> template_default;
24
+ std::unique_ptr<common_chat_template> template_tool_use;
25
+ };
26
+
16
27
  namespace rnllama {
17
28
 
29
+
18
30
  std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
19
31
 
20
32
  std::string tokens_to_str(llama_context *ctx, const std::vector<llama_token>::const_iterator begin, const std::vector<llama_token>::const_iterator end);
@@ -63,6 +75,7 @@ struct llama_rn_context {
63
75
 
64
76
  llama_context *ctx = nullptr;
65
77
  common_sampler *ctx_sampling = nullptr;
78
+ common_chat_templates_ptr templates = nullptr;
66
79
 
67
80
  int n_ctx;
68
81
 
@@ -80,7 +93,19 @@ struct llama_rn_context {
80
93
  void rewind();
81
94
  bool initSampling();
82
95
  bool loadModel(common_params &params_);
83
- bool validateModelChatTemplate() const;
96
+ bool validateModelChatTemplate(bool use_jinja, const char *name) const;
97
+ common_chat_params getFormattedChatWithJinja(
98
+ const std::string &messages,
99
+ const std::string &chat_template,
100
+ const std::string &json_schema,
101
+ const std::string &tools,
102
+ const bool &parallel_tool_calls,
103
+ const std::string &tool_choice
104
+ ) const;
105
+ std::string getFormattedChat(
106
+ const std::string &messages,
107
+ const std::string &chat_template
108
+ ) const;
84
109
  void truncatePrompt(std::vector<llama_token> &prompt_tokens);
85
110
  void loadPrompt();
86
111
  void beginCompletion();