cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
package/README.md CHANGED
@@ -76,7 +76,7 @@ const context = await initLlama({
76
76
  model: modelPath,
77
77
  use_mlock: true,
78
78
  n_ctx: 2048,
79
- n_gpu_layers: 1, // > 0: enable Metal on iOS
79
+ n_gpu_layers: 99, // number of layers to store in VRAM (Currently only for iOS)
80
80
  // embedding: true, // use embedding
81
81
  })
82
82
 
@@ -141,148 +141,127 @@ Please visit the [Documentation](docs/API) for more details.
141
141
 
142
142
  You can also visit the [example](example) to see how to use it.
143
143
 
144
- ## Grammar Sampling
144
+ ## Tool Calling
145
145
 
146
- GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
146
+ `llama.rn` has universal tool call support by using [minja](https://github.com/google/minja) (as Jinja template parser) and [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/master/common/chat.cpp) in llama.cpp.
147
147
 
148
- You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
149
-
150
- `llama.rn` provided a built-in function to convert JSON Schema to GBNF:
148
+ Example:
151
149
 
152
150
  ```js
153
- import { initLlama, convertJsonSchemaToGrammar } from 'llama.rn'
154
-
155
- const schema = {
156
- /* JSON Schema, see below */
157
- }
151
+ import { initLlama } from 'llama.rn'
158
152
 
159
153
  const context = await initLlama({
160
- model: 'file://<path to gguf model>',
161
- use_mlock: true,
162
- n_ctx: 2048,
163
- n_gpu_layers: 1, // > 0: enable Metal on iOS
164
- // embedding: true, // use embedding
165
- grammar: convertJsonSchemaToGrammar({
166
- schema,
167
- propOrder: { function: 0, arguments: 1 },
168
- }),
169
- })
170
-
171
- const { text } = await context.completion({
172
- prompt: 'Schedule a birthday party on Aug 14th 2023 at 8pm.',
154
+ // ...params
173
155
  })
174
- console.log('Result:', text)
175
- // Example output:
176
- // {"function": "create_event","arguments":{"date": "Aug 14th 2023", "time": "8pm", "title": "Birthday Party"}}
177
- ```
178
156
 
179
- <details>
180
- <summary>JSON Schema example (Define function get_current_weather / create_event / image_search)</summary>
181
-
182
- ```json5
183
- {
184
- oneOf: [
157
+ const { text, tool_calls } = await context.completion({
158
+ // ...params
159
+ jinja: true, // Enable Jinja template parser
160
+ tool_choice: 'auto',
161
+ tools: [
185
162
  {
186
- type: 'object',
187
- name: 'get_current_weather',
188
- description: 'Get the current weather in a given location',
189
- properties: {
190
- function: {
191
- const: 'get_current_weather',
192
- },
193
- arguments: {
163
+ type: 'function',
164
+ function: {
165
+ name: 'ipython',
166
+ description:
167
+ 'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.',
168
+ parameters: {
194
169
  type: 'object',
195
170
  properties: {
196
- location: {
171
+ code: {
197
172
  type: 'string',
198
- description: 'The city and state, e.g. San Francisco, CA',
199
- },
200
- unit: {
201
- type: 'string',
202
- enum: ['celsius', 'fahrenheit'],
173
+ description: 'The code to run in the ipython interpreter.',
203
174
  },
204
175
  },
205
- required: ['location'],
176
+ required: ['code'],
206
177
  },
207
178
  },
208
179
  },
180
+ ],
181
+ messages: [
209
182
  {
210
- type: 'object',
211
- name: 'create_event',
212
- description: 'Create a calendar event',
213
- properties: {
214
- function: {
215
- const: 'create_event',
216
- },
217
- arguments: {
218
- type: 'object',
219
- properties: {
220
- title: {
221
- type: 'string',
222
- description: 'The title of the event',
223
- },
224
- date: {
225
- type: 'string',
226
- description: 'The date of the event',
227
- },
228
- time: {
229
- type: 'string',
230
- description: 'The time of the event',
231
- },
232
- },
233
- required: ['title', 'date', 'time'],
234
- },
235
- },
183
+ role: 'system',
184
+ content: 'You are a helpful assistant that can answer questions and help with tasks.',
236
185
  },
237
186
  {
238
- type: 'object',
239
- name: 'image_search',
240
- description: 'Search for an image',
241
- properties: {
242
- function: {
243
- const: 'image_search',
244
- },
245
- arguments: {
246
- type: 'object',
247
- properties: {
248
- query: {
249
- type: 'string',
250
- description: 'The search query',
251
- },
252
- },
253
- required: ['query'],
254
- },
255
- },
187
+ role: 'user',
188
+ content: 'Test',
256
189
  },
257
190
  ],
258
- }
191
+ })
192
+ console.log('Result:', text)
193
+ // If tool_calls is not empty, it means the model has called the tool
194
+ if (tool_calls) console.log('Tool Calls:', tool_calls)
259
195
  ```
260
196
 
261
- </details>
197
+ You can check [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/6eecde3cc8fda44da7794042e3668de4af3c32c6/common/chat.cpp#L7-L23) for models has native tool calling support, or it will fallback to `GENERIC` type tool call.
262
198
 
263
- <details>
264
- <summary>Converted GBNF looks like</summary>
199
+ The generic tool call will be always JSON object as output, the output will be like `{"response": "..."}` when it not decided to use tool call.
265
200
 
201
+ ## Grammar Sampling
202
+
203
+ GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
204
+
205
+ You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
206
+
207
+ `llama.rn` provided a built-in function to convert JSON Schema to GBNF:
208
+
209
+ Example gbnf grammar:
266
210
  ```bnf
267
- space ::= " "?
268
- 0-function ::= "\"get_current_weather\""
269
- string ::= "\"" (
270
- [^"\\] |
271
- "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
272
- )* "\"" space
273
- 0-arguments-unit ::= "\"celsius\"" | "\"fahrenheit\""
274
- 0-arguments ::= "{" space "\"location\"" space ":" space string "," space "\"unit\"" space ":" space 0-arguments-unit "}" space
275
- 0 ::= "{" space "\"function\"" space ":" space 0-function "," space "\"arguments\"" space ":" space 0-arguments "}" space
276
- 1-function ::= "\"create_event\""
277
- 1-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
278
- 1 ::= "{" space "\"function\"" space ":" space 1-function "," space "\"arguments\"" space ":" space 1-arguments "}" space
279
- 2-function ::= "\"image_search\""
280
- 2-arguments ::= "{" space "\"query\"" space ":" space string "}" space
281
- 2 ::= "{" space "\"function\"" space ":" space 2-function "," space "\"arguments\"" space ":" space 2-arguments "}" space
282
- root ::= 0 | 1 | 2
211
+ root ::= object
212
+ value ::= object | array | string | number | ("true" | "false" | "null") ws
213
+
214
+ object ::=
215
+ "{" ws (
216
+ string ":" ws value
217
+ ("," ws string ":" ws value)*
218
+ )? "}" ws
219
+
220
+ array ::=
221
+ "[" ws (
222
+ value
223
+ ("," ws value)*
224
+ )? "]" ws
225
+
226
+ string ::=
227
+ "\"" (
228
+ [^"\\\x7F\x00-\x1F] |
229
+ "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
230
+ )* "\"" ws
231
+
232
+ number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
233
+
234
+ # Optional space: by convention, applied in this grammar after literal chars when allowed
235
+ ws ::= | " " | "\n" [ \t]{0,20}
236
+ ```
237
+
238
+ ```js
239
+ import { initLlama } from 'llama.rn'
240
+
241
+ const gbnf = '...'
242
+
243
+ const context = await initLlama({
244
+ // ...params
245
+ grammar: gbnf,
246
+ })
247
+
248
+ const { text } = await context.completion({
249
+ // ...params
250
+ messages: [
251
+ {
252
+ role: 'system',
253
+ content: 'You are a helpful assistant that can answer questions and help with tasks.',
254
+ },
255
+ {
256
+ role: 'user',
257
+ content: 'Test',
258
+ },
259
+ ],
260
+ })
261
+ console.log('Result:', text)
283
262
  ```
284
263
 
285
- </details>
264
+ Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
286
265
 
287
266
  ## Mock `llama.rn`
288
267
 
@@ -51,6 +51,11 @@ set(
51
51
  ${RNLLAMA_LIB_DIR}/unicode.cpp
52
52
  ${RNLLAMA_LIB_DIR}/sgemm.cpp
53
53
  ${RNLLAMA_LIB_DIR}/common.cpp
54
+ ${RNLLAMA_LIB_DIR}/chat.cpp
55
+ ${RNLLAMA_LIB_DIR}/minja/chat-template.hpp
56
+ ${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
57
+ ${RNLLAMA_LIB_DIR}/minja/minja.hpp
58
+ ${RNLLAMA_LIB_DIR}/json.hpp
54
59
  ${RNLLAMA_LIB_DIR}/rn-llama.cpp
55
60
  ${CMAKE_SOURCE_DIR}/jni-utils.h
56
61
  ${CMAKE_SOURCE_DIR}/jni.cpp
@@ -28,6 +28,32 @@ public class LlamaContext {
28
28
 
29
29
  private static String loadedLibrary = "";
30
30
 
31
+ private static class NativeLogCallback {
32
+ DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
33
+
34
+ public NativeLogCallback(ReactApplicationContext reactContext) {
35
+ this.eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
36
+ }
37
+
38
+ void emitNativeLog(String level, String text) {
39
+ WritableMap event = Arguments.createMap();
40
+ event.putString("level", level);
41
+ event.putString("text", text);
42
+ eventEmitter.emit("@RNLlama_onNativeLog", event);
43
+ }
44
+ }
45
+
46
+ static void toggleNativeLog(ReactApplicationContext reactContext, boolean enabled) {
47
+ if (LlamaContext.isArchNotSupported()) {
48
+ throw new IllegalStateException("Only 64-bit architectures are supported");
49
+ }
50
+ if (enabled) {
51
+ setupLog(new NativeLogCallback(reactContext));
52
+ } else {
53
+ unsetLog();
54
+ }
55
+ }
56
+
31
57
  private int id;
32
58
  private ReactApplicationContext reactContext;
33
59
  private long context;
@@ -73,7 +99,7 @@ public class LlamaContext {
73
99
  }
74
100
 
75
101
  public LlamaContext(int id, ReactApplicationContext reactContext, ReadableMap params) {
76
- if (LlamaContext.isArm64V8a() == false && LlamaContext.isX86_64() == false) {
102
+ if (LlamaContext.isArchNotSupported()) {
77
103
  throw new IllegalStateException("Only 64-bit architectures are supported");
78
104
  }
79
105
  if (!params.hasKey("model")) {
@@ -95,13 +121,17 @@ public class LlamaContext {
95
121
  Log.e(NAME, "Failed to convert to FD!");
96
122
  }
97
123
  }
98
- logToAndroid();
124
+
99
125
  // Check if file has GGUF magic numbers
100
126
  this.id = id;
101
127
  eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
102
128
  this.context = initContext(
103
129
  // String model,
104
130
  modelName,
131
+ // String chat_template,
132
+ params.hasKey("chat_template") ? params.getString("chat_template") : "",
133
+ // String reasoning_format,
134
+ params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
105
135
  // boolean embedding,
106
136
  params.hasKey("embedding") ? params.getBoolean("embedding") : false,
107
137
  // int embd_normalize,
@@ -166,12 +196,24 @@ public class LlamaContext {
166
196
  return loadedLibrary;
167
197
  }
168
198
 
169
- public String getFormattedChat(ReadableArray messages, String chatTemplate) {
170
- ReadableMap[] msgs = new ReadableMap[messages.size()];
171
- for (int i = 0; i < messages.size(); i++) {
172
- msgs[i] = messages.getMap(i);
173
- }
174
- return getFormattedChat(this.context, msgs, chatTemplate == null ? "" : chatTemplate);
199
+ public WritableMap getFormattedChatWithJinja(String messages, String chatTemplate, ReadableMap params) {
200
+ String jsonSchema = params.hasKey("json_schema") ? params.getString("json_schema") : "";
201
+ String tools = params.hasKey("tools") ? params.getString("tools") : "";
202
+ Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
203
+ String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
204
+ return getFormattedChatWithJinja(
205
+ this.context,
206
+ messages,
207
+ chatTemplate == null ? "" : chatTemplate,
208
+ jsonSchema,
209
+ tools,
210
+ parallelToolCalls,
211
+ toolChoice
212
+ );
213
+ }
214
+
215
+ public String getFormattedChat(String messages, String chatTemplate) {
216
+ return getFormattedChat(this.context, messages, chatTemplate == null ? "" : chatTemplate);
175
217
  }
176
218
 
177
219
  private void emitLoadProgress(int progress) {
@@ -259,8 +301,18 @@ public class LlamaContext {
259
301
  this.context,
260
302
  // String prompt,
261
303
  params.getString("prompt"),
304
+ // int chat_format,
305
+ params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
262
306
  // String grammar,
263
307
  params.hasKey("grammar") ? params.getString("grammar") : "",
308
+ // String json_schema,
309
+ params.hasKey("json_schema") ? params.getString("json_schema") : "",
310
+ // boolean grammar_lazy,
311
+ params.hasKey("grammar_lazy") ? params.getBoolean("grammar_lazy") : false,
312
+ // ReadableArray grammar_triggers,
313
+ params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
314
+ // ReadableArray preserved_tokens,
315
+ params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
264
316
  // float temperature,
265
317
  params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
266
318
  // int n_threads,
@@ -311,6 +363,8 @@ public class LlamaContext {
311
363
  params.hasKey("dry_allowed_length") ? params.getInt("dry_allowed_length") : 2,
312
364
  // int dry_penalty_last_n,
313
365
  params.hasKey("dry_penalty_last_n") ? params.getInt("dry_penalty_last_n") : -1,
366
+ // float top_n_sigma,
367
+ params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
314
368
  // String[] dry_sequence_breakers, when undef, we use the default definition from common.h
315
369
  params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
316
370
  // PartialCompletionCallback partial_completion_callback
@@ -431,15 +485,13 @@ public class LlamaContext {
431
485
  // Log.d(NAME, "Loading librnllama_v8_7.so with runtime feature detection");
432
486
  // System.loadLibrary("rnllama_v8_7");
433
487
  } else if (LlamaContext.isX86_64()) {
434
- Log.d(NAME, "Loading librnllama_x86_64.so");
435
- System.loadLibrary("rnllama_x86_64");
436
- loadedLibrary = "rnllama_x86_64";
488
+ Log.d(NAME, "Loading librnllama_x86_64.so");
489
+ System.loadLibrary("rnllama_x86_64");
490
+ loadedLibrary = "rnllama_x86_64";
437
491
  } else {
438
- Log.d(NAME, "Loading default librnllama.so");
439
- System.loadLibrary("rnllama");
440
- loadedLibrary = "rnllama";
492
+ Log.d(NAME, "ARM32 is not supported, skipping loading library");
441
493
  }
442
- }
494
+ }
443
495
 
444
496
  public static boolean isArm64V8a() {
445
497
  return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
@@ -449,6 +501,10 @@ public class LlamaContext {
449
501
  return Build.SUPPORTED_ABIS[0].equals("x86_64");
450
502
  }
451
503
 
504
+ private static boolean isArchNotSupported() {
505
+ return isArm64V8a() == false && isX86_64() == false;
506
+ }
507
+
452
508
  public static String getCpuFeatures() {
453
509
  File file = new File("/proc/cpuinfo");
454
510
  StringBuilder stringBuilder = new StringBuilder();
@@ -481,6 +537,8 @@ public class LlamaContext {
481
537
  );
482
538
  protected static native long initContext(
483
539
  String model,
540
+ String chat_template,
541
+ String reasoning_format,
484
542
  boolean embedding,
485
543
  int embd_normalize,
486
544
  int n_ctx,
@@ -506,9 +564,18 @@ public class LlamaContext {
506
564
  protected static native WritableMap loadModelDetails(
507
565
  long contextPtr
508
566
  );
567
+ protected static native WritableMap getFormattedChatWithJinja(
568
+ long contextPtr,
569
+ String messages,
570
+ String chatTemplate,
571
+ String jsonSchema,
572
+ String tools,
573
+ boolean parallelToolCalls,
574
+ String toolChoice
575
+ );
509
576
  protected static native String getFormattedChat(
510
577
  long contextPtr,
511
- ReadableMap[] messages,
578
+ String messages,
512
579
  String chatTemplate
513
580
  );
514
581
  protected static native WritableMap loadSession(
@@ -523,7 +590,12 @@ public class LlamaContext {
523
590
  protected static native WritableMap doCompletion(
524
591
  long context_ptr,
525
592
  String prompt,
593
+ int chat_format,
526
594
  String grammar,
595
+ String json_schema,
596
+ boolean grammar_lazy,
597
+ ReadableArray grammar_triggers,
598
+ ReadableArray preserved_tokens,
527
599
  float temperature,
528
600
  int n_threads,
529
601
  int n_predict,
@@ -549,6 +621,7 @@ public class LlamaContext {
549
621
  float dry_base,
550
622
  int dry_allowed_length,
551
623
  int dry_penalty_last_n,
624
+ float top_n_sigma,
552
625
  String[] dry_sequence_breakers,
553
626
  PartialCompletionCallback partial_completion_callback
554
627
  );
@@ -567,5 +640,6 @@ public class LlamaContext {
567
640
  protected static native void removeLoraAdapters(long contextPtr);
568
641
  protected static native WritableArray getLoadedLoraAdapters(long contextPtr);
569
642
  protected static native void freeContext(long contextPtr);
570
- protected static native void logToAndroid();
643
+ protected static native void setupLog(NativeLogCallback logCallback);
644
+ protected static native void unsetLog();
571
645
  }
@@ -35,6 +35,32 @@ public class RNLlama implements LifecycleEventListener {
35
35
 
36
36
  private HashMap<Integer, LlamaContext> contexts = new HashMap<>();
37
37
 
38
+ public void toggleNativeLog(boolean enabled, Promise promise) {
39
+ new AsyncTask<Void, Void, Boolean>() {
40
+ private Exception exception;
41
+
42
+ @Override
43
+ protected Boolean doInBackground(Void... voids) {
44
+ try {
45
+ LlamaContext.toggleNativeLog(reactContext, enabled);
46
+ return true;
47
+ } catch (Exception e) {
48
+ exception = e;
49
+ }
50
+ return null;
51
+ }
52
+
53
+ @Override
54
+ protected void onPostExecute(Boolean result) {
55
+ if (exception != null) {
56
+ promise.reject(exception);
57
+ return;
58
+ }
59
+ promise.resolve(result);
60
+ }
61
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
62
+ }
63
+
38
64
  private int llamaContextLimit = -1;
39
65
 
40
66
  public void setContextLimit(double limit, Promise promise) {
@@ -116,18 +142,25 @@ public class RNLlama implements LifecycleEventListener {
116
142
  tasks.put(task, "initContext");
117
143
  }
118
144
 
119
- public void getFormattedChat(double id, final ReadableArray messages, final String chatTemplate, Promise promise) {
145
+ public void getFormattedChat(double id, final String messages, final String chatTemplate, final ReadableMap params, Promise promise) {
120
146
  final int contextId = (int) id;
121
- AsyncTask task = new AsyncTask<Void, Void, String>() {
147
+ AsyncTask task = new AsyncTask<Void, Void, Object>() {
122
148
  private Exception exception;
123
149
 
124
150
  @Override
125
- protected String doInBackground(Void... voids) {
151
+ protected Object doInBackground(Void... voids) {
126
152
  try {
127
153
  LlamaContext context = contexts.get(contextId);
128
154
  if (context == null) {
129
155
  throw new Exception("Context not found");
130
156
  }
157
+ if (params.hasKey("jinja") && params.getBoolean("jinja")) {
158
+ ReadableMap result = context.getFormattedChatWithJinja(messages, chatTemplate, params);
159
+ if (result.hasKey("_error")) {
160
+ throw new Exception(result.getString("_error"));
161
+ }
162
+ return result;
163
+ }
131
164
  return context.getFormattedChat(messages, chatTemplate);
132
165
  } catch (Exception e) {
133
166
  exception = e;
@@ -136,7 +169,7 @@ public class RNLlama implements LifecycleEventListener {
136
169
  }
137
170
 
138
171
  @Override
139
- protected void onPostExecute(String result) {
172
+ protected void onPostExecute(Object result) {
140
173
  if (exception != null) {
141
174
  promise.reject(exception);
142
175
  return;
@@ -16,6 +16,12 @@ jobject getMap(JNIEnv *env, jobject readableArray, int index) {
16
16
  return env->CallObjectMethod(readableArray, getMapMethod, index);
17
17
  }
18
18
 
19
+ jstring getString(JNIEnv *env, jobject readableArray, int index) {
20
+ jclass arrayClass = env->GetObjectClass(readableArray);
21
+ jmethodID getStringMethod = env->GetMethodID(arrayClass, "getString", "(I)Ljava/lang/String;");
22
+ return (jstring) env->CallObjectMethod(readableArray, getStringMethod, index);
23
+ }
24
+
19
25
  // Other methods not used yet
20
26
 
21
27
  }