cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
package/cpp/chat.h ADDED
@@ -0,0 +1,135 @@
1
+ // Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
2
+
3
+ #pragma once
4
+
5
+ #include "common.h"
6
+ #include <string>
7
+ #include <vector>
8
+
9
+ struct common_chat_templates;
10
+
11
+ struct common_chat_tool_call {
12
+ std::string name;
13
+ std::string arguments;
14
+ std::string id;
15
+ };
16
+
17
+ struct common_chat_msg_content_part {
18
+ std::string type;
19
+ std::string text;
20
+ };
21
+
22
+ struct common_chat_msg {
23
+ std::string role;
24
+ std::string content;
25
+ std::vector<common_chat_msg_content_part> content_parts = {};
26
+ std::vector<common_chat_tool_call> tool_calls = {};
27
+ std::string reasoning_content;
28
+ std::string tool_name;
29
+ std::string tool_call_id;
30
+ };
31
+
32
+ struct common_chat_tool {
33
+ std::string name;
34
+ std::string description;
35
+ std::string parameters;
36
+ };
37
+
38
+ enum common_chat_tool_choice {
39
+ COMMON_CHAT_TOOL_CHOICE_AUTO,
40
+ COMMON_CHAT_TOOL_CHOICE_REQUIRED,
41
+ COMMON_CHAT_TOOL_CHOICE_NONE,
42
+ };
43
+
44
+ enum common_chat_format {
45
+ COMMON_CHAT_FORMAT_CONTENT_ONLY,
46
+ COMMON_CHAT_FORMAT_GENERIC,
47
+ COMMON_CHAT_FORMAT_MISTRAL_NEMO,
48
+ COMMON_CHAT_FORMAT_LLAMA_3_X,
49
+ COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
50
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
51
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
52
+ COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
53
+ COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
54
+ COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
55
+ COMMON_CHAT_FORMAT_HERMES_2_PRO,
56
+ COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
57
+ COMMON_CHAT_FORMAT_COMMAND_R7B,
58
+ COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
59
+
60
+ COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
61
+ };
62
+
63
+ struct common_chat_templates_inputs {
64
+ std::vector<common_chat_msg> messages;
65
+ std::string grammar;
66
+ std::string json_schema;
67
+ bool add_generation_prompt = true;
68
+ bool use_jinja = true;
69
+ // Parameters below only supported when use_jinja is true
70
+ std::vector<common_chat_tool> tools;
71
+ common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
72
+ bool parallel_tool_calls = false;
73
+ bool extract_reasoning = true;
74
+ };
75
+
76
+ struct common_chat_params {
77
+ common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
78
+ std::string prompt;
79
+ std::string grammar;
80
+ bool grammar_lazy = false;
81
+ std::vector<common_grammar_trigger> grammar_triggers;
82
+ std::vector<std::string> preserved_tokens;
83
+ std::vector<std::string> additional_stops;
84
+ };
85
+
86
+ // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
87
+ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
88
+
89
+ void common_chat_templates_free(struct common_chat_templates * tmpls);
90
+
91
+ struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
92
+
93
+ typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
94
+
95
+ common_chat_templates_ptr common_chat_templates_init(
96
+ const struct llama_model * model,
97
+ const std::string & chat_template_override,
98
+ const std::string & bos_token_override = "",
99
+ const std::string & eos_token_override = "");
100
+
101
+ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
102
+ const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
103
+
104
+
105
+ struct common_chat_params common_chat_templates_apply(
106
+ const struct common_chat_templates * tmpls,
107
+ const struct common_chat_templates_inputs & inputs);
108
+
109
+ // Format single message, while taking into account the position of that message in chat history
110
+ std::string common_chat_format_single(
111
+ const struct common_chat_templates * tmpls,
112
+ const std::vector<common_chat_msg> & past_msg,
113
+ const common_chat_msg & new_msg,
114
+ bool add_ass,
115
+ bool use_jinja);
116
+
117
+ // Returns an example of formatted chat
118
+ std::string common_chat_format_example(
119
+ const struct common_chat_templates * tmpls,
120
+ bool use_jinja);
121
+
122
+ std::string common_chat_format_name(common_chat_format format);
123
+ common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);
124
+
125
+ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
126
+
127
+ // Parses a JSON array of messages in OpenAI's chat completion API format.
128
+ // T can be std::string containing JSON or nlohmann::ordered_json
129
+ template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
130
+ template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
131
+
132
+ // Parses a JSON array of tools in OpenAI's chat completion tool call API format.
133
+ // T can be std::string containing JSON or nlohmann::ordered_json
134
+ template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
135
+ template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);