cui-llama.rn 1.4.3 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -114
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
- package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
- package/android/src/main/jni-utils.h +6 -0
- package/android/src/main/jni.cpp +289 -31
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/cpp/chat-template.hpp +529 -0
- package/cpp/chat.cpp +1779 -0
- package/cpp/chat.h +135 -0
- package/cpp/common.cpp +2064 -1873
- package/cpp/common.h +700 -699
- package/cpp/ggml-alloc.c +1039 -1042
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +255 -255
- package/cpp/ggml-backend-reg.cpp +586 -582
- package/cpp/ggml-backend.cpp +2004 -2002
- package/cpp/ggml-backend.h +354 -354
- package/cpp/ggml-common.h +1851 -1853
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +531 -386
- package/cpp/ggml-cpu-quants.c +12527 -10920
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +15766 -14391
- package/cpp/ggml-cpu.cpp +655 -635
- package/cpp/ggml-cpu.h +138 -135
- package/cpp/ggml-impl.h +567 -567
- package/cpp/ggml-metal-impl.h +235 -0
- package/cpp/ggml-metal.h +1 -1
- package/cpp/ggml-metal.m +5146 -4884
- package/cpp/ggml-opt.cpp +854 -854
- package/cpp/ggml-opt.h +216 -216
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6529 -6514
- package/cpp/ggml.h +2198 -2194
- package/cpp/gguf.cpp +1329 -1329
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1024 -1045
- package/cpp/json-schema-to-grammar.h +21 -8
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +347 -347
- package/cpp/llama-adapter.h +74 -74
- package/cpp/llama-arch.cpp +1513 -1487
- package/cpp/llama-arch.h +403 -400
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +588 -578
- package/cpp/llama-chat.h +53 -52
- package/cpp/llama-context.cpp +1775 -1775
- package/cpp/llama-context.h +128 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +37 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1219 -1139
- package/cpp/llama-grammar.h +173 -143
- package/cpp/llama-hparams.cpp +71 -71
- package/cpp/llama-hparams.h +139 -139
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-kv-cache.cpp +718 -718
- package/cpp/llama-kv-cache.h +219 -218
- package/cpp/llama-mmap.cpp +600 -590
- package/cpp/llama-mmap.h +68 -67
- package/cpp/llama-model-loader.cpp +1124 -1124
- package/cpp/llama-model-loader.h +167 -167
- package/cpp/llama-model.cpp +4087 -3997
- package/cpp/llama-model.h +370 -370
- package/cpp/llama-sampling.cpp +2558 -2408
- package/cpp/llama-sampling.h +32 -32
- package/cpp/llama-vocab.cpp +3264 -3247
- package/cpp/llama-vocab.h +125 -125
- package/cpp/llama.cpp +10284 -10077
- package/cpp/llama.h +1354 -1323
- package/cpp/log.cpp +393 -401
- package/cpp/log.h +132 -121
- package/cpp/minja/chat-template.hpp +529 -0
- package/cpp/minja/minja.hpp +2915 -0
- package/cpp/minja.hpp +2915 -0
- package/cpp/rn-llama.cpp +66 -6
- package/cpp/rn-llama.h +26 -1
- package/cpp/sampling.cpp +570 -505
- package/cpp/sampling.h +3 -0
- package/cpp/sgemm.cpp +2598 -2597
- package/cpp/sgemm.h +14 -14
- package/cpp/speculative.cpp +278 -277
- package/cpp/speculative.h +28 -28
- package/cpp/unicode.cpp +9 -2
- package/ios/CMakeLists.txt +6 -0
- package/ios/RNLlama.h +0 -8
- package/ios/RNLlama.mm +27 -3
- package/ios/RNLlamaContext.h +10 -1
- package/ios/RNLlamaContext.mm +269 -57
- package/jest/mock.js +21 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +3 -0
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +87 -13
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +3 -0
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +86 -13
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +107 -2
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +32 -7
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +1 -1
- package/package.json +3 -2
- package/src/NativeRNLlama.ts +115 -3
- package/src/grammar.ts +3 -0
- package/src/index.ts +138 -21
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
- package/cpp/rn-llama.hpp +0 -913
package/README.md
CHANGED
@@ -76,7 +76,7 @@ const context = await initLlama({
|
|
76
76
|
model: modelPath,
|
77
77
|
use_mlock: true,
|
78
78
|
n_ctx: 2048,
|
79
|
-
n_gpu_layers:
|
79
|
+
n_gpu_layers: 99, // number of layers to store in VRAM (Currently only for iOS)
|
80
80
|
// embedding: true, // use embedding
|
81
81
|
})
|
82
82
|
|
@@ -141,148 +141,127 @@ Please visit the [Documentation](docs/API) for more details.
|
|
141
141
|
|
142
142
|
You can also visit the [example](example) to see how to use it.
|
143
143
|
|
144
|
-
##
|
144
|
+
## Tool Calling
|
145
145
|
|
146
|
-
|
146
|
+
`llama.rn` has universal tool call support by using [minja](https://github.com/google/minja) (as Jinja template parser) and [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/master/common/chat.cpp) in llama.cpp.
|
147
147
|
|
148
|
-
|
149
|
-
|
150
|
-
`llama.rn` provided a built-in function to convert JSON Schema to GBNF:
|
148
|
+
Example:
|
151
149
|
|
152
150
|
```js
|
153
|
-
import { initLlama
|
154
|
-
|
155
|
-
const schema = {
|
156
|
-
/* JSON Schema, see below */
|
157
|
-
}
|
151
|
+
import { initLlama } from 'llama.rn'
|
158
152
|
|
159
153
|
const context = await initLlama({
|
160
|
-
|
161
|
-
use_mlock: true,
|
162
|
-
n_ctx: 2048,
|
163
|
-
n_gpu_layers: 1, // > 0: enable Metal on iOS
|
164
|
-
// embedding: true, // use embedding
|
165
|
-
grammar: convertJsonSchemaToGrammar({
|
166
|
-
schema,
|
167
|
-
propOrder: { function: 0, arguments: 1 },
|
168
|
-
}),
|
169
|
-
})
|
170
|
-
|
171
|
-
const { text } = await context.completion({
|
172
|
-
prompt: 'Schedule a birthday party on Aug 14th 2023 at 8pm.',
|
154
|
+
// ...params
|
173
155
|
})
|
174
|
-
console.log('Result:', text)
|
175
|
-
// Example output:
|
176
|
-
// {"function": "create_event","arguments":{"date": "Aug 14th 2023", "time": "8pm", "title": "Birthday Party"}}
|
177
|
-
```
|
178
156
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
oneOf: [
|
157
|
+
const { text, tool_calls } = await context.completion({
|
158
|
+
// ...params
|
159
|
+
jinja: true, // Enable Jinja template parser
|
160
|
+
tool_choice: 'auto',
|
161
|
+
tools: [
|
185
162
|
{
|
186
|
-
type: '
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
},
|
193
|
-
arguments: {
|
163
|
+
type: 'function',
|
164
|
+
function: {
|
165
|
+
name: 'ipython',
|
166
|
+
description:
|
167
|
+
'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.',
|
168
|
+
parameters: {
|
194
169
|
type: 'object',
|
195
170
|
properties: {
|
196
|
-
|
171
|
+
code: {
|
197
172
|
type: 'string',
|
198
|
-
description: 'The
|
199
|
-
},
|
200
|
-
unit: {
|
201
|
-
type: 'string',
|
202
|
-
enum: ['celsius', 'fahrenheit'],
|
173
|
+
description: 'The code to run in the ipython interpreter.',
|
203
174
|
},
|
204
175
|
},
|
205
|
-
required: ['
|
176
|
+
required: ['code'],
|
206
177
|
},
|
207
178
|
},
|
208
179
|
},
|
180
|
+
],
|
181
|
+
messages: [
|
209
182
|
{
|
210
|
-
|
211
|
-
|
212
|
-
description: 'Create a calendar event',
|
213
|
-
properties: {
|
214
|
-
function: {
|
215
|
-
const: 'create_event',
|
216
|
-
},
|
217
|
-
arguments: {
|
218
|
-
type: 'object',
|
219
|
-
properties: {
|
220
|
-
title: {
|
221
|
-
type: 'string',
|
222
|
-
description: 'The title of the event',
|
223
|
-
},
|
224
|
-
date: {
|
225
|
-
type: 'string',
|
226
|
-
description: 'The date of the event',
|
227
|
-
},
|
228
|
-
time: {
|
229
|
-
type: 'string',
|
230
|
-
description: 'The time of the event',
|
231
|
-
},
|
232
|
-
},
|
233
|
-
required: ['title', 'date', 'time'],
|
234
|
-
},
|
235
|
-
},
|
183
|
+
role: 'system',
|
184
|
+
content: 'You are a helpful assistant that can answer questions and help with tasks.',
|
236
185
|
},
|
237
186
|
{
|
238
|
-
|
239
|
-
|
240
|
-
description: 'Search for an image',
|
241
|
-
properties: {
|
242
|
-
function: {
|
243
|
-
const: 'image_search',
|
244
|
-
},
|
245
|
-
arguments: {
|
246
|
-
type: 'object',
|
247
|
-
properties: {
|
248
|
-
query: {
|
249
|
-
type: 'string',
|
250
|
-
description: 'The search query',
|
251
|
-
},
|
252
|
-
},
|
253
|
-
required: ['query'],
|
254
|
-
},
|
255
|
-
},
|
187
|
+
role: 'user',
|
188
|
+
content: 'Test',
|
256
189
|
},
|
257
190
|
],
|
258
|
-
}
|
191
|
+
})
|
192
|
+
console.log('Result:', text)
|
193
|
+
// If tool_calls is not empty, it means the model has called the tool
|
194
|
+
if (tool_calls) console.log('Tool Calls:', tool_calls)
|
259
195
|
```
|
260
196
|
|
261
|
-
|
197
|
+
You can check [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/6eecde3cc8fda44da7794042e3668de4af3c32c6/common/chat.cpp#L7-L23) for models has native tool calling support, or it will fallback to `GENERIC` type tool call.
|
262
198
|
|
263
|
-
|
264
|
-
<summary>Converted GBNF looks like</summary>
|
199
|
+
The generic tool call will be always JSON object as output, the output will be like `{"response": "..."}` when it not decided to use tool call.
|
265
200
|
|
201
|
+
## Grammar Sampling
|
202
|
+
|
203
|
+
GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
|
204
|
+
|
205
|
+
You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
|
206
|
+
|
207
|
+
`llama.rn` provided a built-in function to convert JSON Schema to GBNF:
|
208
|
+
|
209
|
+
Example gbnf grammar:
|
266
210
|
```bnf
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
211
|
+
root ::= object
|
212
|
+
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
213
|
+
|
214
|
+
object ::=
|
215
|
+
"{" ws (
|
216
|
+
string ":" ws value
|
217
|
+
("," ws string ":" ws value)*
|
218
|
+
)? "}" ws
|
219
|
+
|
220
|
+
array ::=
|
221
|
+
"[" ws (
|
222
|
+
value
|
223
|
+
("," ws value)*
|
224
|
+
)? "]" ws
|
225
|
+
|
226
|
+
string ::=
|
227
|
+
"\"" (
|
228
|
+
[^"\\\x7F\x00-\x1F] |
|
229
|
+
"\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
|
230
|
+
)* "\"" ws
|
231
|
+
|
232
|
+
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
|
233
|
+
|
234
|
+
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
235
|
+
ws ::= | " " | "\n" [ \t]{0,20}
|
236
|
+
```
|
237
|
+
|
238
|
+
```js
|
239
|
+
import { initLlama } from 'llama.rn'
|
240
|
+
|
241
|
+
const gbnf = '...'
|
242
|
+
|
243
|
+
const context = await initLlama({
|
244
|
+
// ...params
|
245
|
+
grammar: gbnf,
|
246
|
+
})
|
247
|
+
|
248
|
+
const { text } = await context.completion({
|
249
|
+
// ...params
|
250
|
+
messages: [
|
251
|
+
{
|
252
|
+
role: 'system',
|
253
|
+
content: 'You are a helpful assistant that can answer questions and help with tasks.',
|
254
|
+
},
|
255
|
+
{
|
256
|
+
role: 'user',
|
257
|
+
content: 'Test',
|
258
|
+
},
|
259
|
+
],
|
260
|
+
})
|
261
|
+
console.log('Result:', text)
|
283
262
|
```
|
284
263
|
|
285
|
-
|
264
|
+
Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
|
286
265
|
|
287
266
|
## Mock `llama.rn`
|
288
267
|
|
@@ -51,6 +51,11 @@ set(
|
|
51
51
|
${RNLLAMA_LIB_DIR}/unicode.cpp
|
52
52
|
${RNLLAMA_LIB_DIR}/sgemm.cpp
|
53
53
|
${RNLLAMA_LIB_DIR}/common.cpp
|
54
|
+
${RNLLAMA_LIB_DIR}/chat.cpp
|
55
|
+
${RNLLAMA_LIB_DIR}/minja/chat-template.hpp
|
56
|
+
${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
|
57
|
+
${RNLLAMA_LIB_DIR}/minja/minja.hpp
|
58
|
+
${RNLLAMA_LIB_DIR}/json.hpp
|
54
59
|
${RNLLAMA_LIB_DIR}/rn-llama.cpp
|
55
60
|
${CMAKE_SOURCE_DIR}/jni-utils.h
|
56
61
|
${CMAKE_SOURCE_DIR}/jni.cpp
|
@@ -28,6 +28,32 @@ public class LlamaContext {
|
|
28
28
|
|
29
29
|
private static String loadedLibrary = "";
|
30
30
|
|
31
|
+
private static class NativeLogCallback {
|
32
|
+
DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
33
|
+
|
34
|
+
public NativeLogCallback(ReactApplicationContext reactContext) {
|
35
|
+
this.eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
|
36
|
+
}
|
37
|
+
|
38
|
+
void emitNativeLog(String level, String text) {
|
39
|
+
WritableMap event = Arguments.createMap();
|
40
|
+
event.putString("level", level);
|
41
|
+
event.putString("text", text);
|
42
|
+
eventEmitter.emit("@RNLlama_onNativeLog", event);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
static void toggleNativeLog(ReactApplicationContext reactContext, boolean enabled) {
|
47
|
+
if (LlamaContext.isArchNotSupported()) {
|
48
|
+
throw new IllegalStateException("Only 64-bit architectures are supported");
|
49
|
+
}
|
50
|
+
if (enabled) {
|
51
|
+
setupLog(new NativeLogCallback(reactContext));
|
52
|
+
} else {
|
53
|
+
unsetLog();
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
31
57
|
private int id;
|
32
58
|
private ReactApplicationContext reactContext;
|
33
59
|
private long context;
|
@@ -73,7 +99,7 @@ public class LlamaContext {
|
|
73
99
|
}
|
74
100
|
|
75
101
|
public LlamaContext(int id, ReactApplicationContext reactContext, ReadableMap params) {
|
76
|
-
if (LlamaContext.
|
102
|
+
if (LlamaContext.isArchNotSupported()) {
|
77
103
|
throw new IllegalStateException("Only 64-bit architectures are supported");
|
78
104
|
}
|
79
105
|
if (!params.hasKey("model")) {
|
@@ -95,13 +121,17 @@ public class LlamaContext {
|
|
95
121
|
Log.e(NAME, "Failed to convert to FD!");
|
96
122
|
}
|
97
123
|
}
|
98
|
-
|
124
|
+
|
99
125
|
// Check if file has GGUF magic numbers
|
100
126
|
this.id = id;
|
101
127
|
eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
|
102
128
|
this.context = initContext(
|
103
129
|
// String model,
|
104
130
|
modelName,
|
131
|
+
// String chat_template,
|
132
|
+
params.hasKey("chat_template") ? params.getString("chat_template") : "",
|
133
|
+
// String reasoning_format,
|
134
|
+
params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
|
105
135
|
// boolean embedding,
|
106
136
|
params.hasKey("embedding") ? params.getBoolean("embedding") : false,
|
107
137
|
// int embd_normalize,
|
@@ -166,12 +196,24 @@ public class LlamaContext {
|
|
166
196
|
return loadedLibrary;
|
167
197
|
}
|
168
198
|
|
169
|
-
public
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
return
|
199
|
+
public WritableMap getFormattedChatWithJinja(String messages, String chatTemplate, ReadableMap params) {
|
200
|
+
String jsonSchema = params.hasKey("json_schema") ? params.getString("json_schema") : "";
|
201
|
+
String tools = params.hasKey("tools") ? params.getString("tools") : "";
|
202
|
+
Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
|
203
|
+
String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
|
204
|
+
return getFormattedChatWithJinja(
|
205
|
+
this.context,
|
206
|
+
messages,
|
207
|
+
chatTemplate == null ? "" : chatTemplate,
|
208
|
+
jsonSchema,
|
209
|
+
tools,
|
210
|
+
parallelToolCalls,
|
211
|
+
toolChoice
|
212
|
+
);
|
213
|
+
}
|
214
|
+
|
215
|
+
public String getFormattedChat(String messages, String chatTemplate) {
|
216
|
+
return getFormattedChat(this.context, messages, chatTemplate == null ? "" : chatTemplate);
|
175
217
|
}
|
176
218
|
|
177
219
|
private void emitLoadProgress(int progress) {
|
@@ -259,8 +301,18 @@ public class LlamaContext {
|
|
259
301
|
this.context,
|
260
302
|
// String prompt,
|
261
303
|
params.getString("prompt"),
|
304
|
+
// int chat_format,
|
305
|
+
params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
|
262
306
|
// String grammar,
|
263
307
|
params.hasKey("grammar") ? params.getString("grammar") : "",
|
308
|
+
// String json_schema,
|
309
|
+
params.hasKey("json_schema") ? params.getString("json_schema") : "",
|
310
|
+
// boolean grammar_lazy,
|
311
|
+
params.hasKey("grammar_lazy") ? params.getBoolean("grammar_lazy") : false,
|
312
|
+
// ReadableArray grammar_triggers,
|
313
|
+
params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
|
314
|
+
// ReadableArray preserved_tokens,
|
315
|
+
params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
|
264
316
|
// float temperature,
|
265
317
|
params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
|
266
318
|
// int n_threads,
|
@@ -311,6 +363,8 @@ public class LlamaContext {
|
|
311
363
|
params.hasKey("dry_allowed_length") ? params.getInt("dry_allowed_length") : 2,
|
312
364
|
// int dry_penalty_last_n,
|
313
365
|
params.hasKey("dry_penalty_last_n") ? params.getInt("dry_penalty_last_n") : -1,
|
366
|
+
// float top_n_sigma,
|
367
|
+
params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
|
314
368
|
// String[] dry_sequence_breakers, when undef, we use the default definition from common.h
|
315
369
|
params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
|
316
370
|
// PartialCompletionCallback partial_completion_callback
|
@@ -431,15 +485,13 @@ public class LlamaContext {
|
|
431
485
|
// Log.d(NAME, "Loading librnllama_v8_7.so with runtime feature detection");
|
432
486
|
// System.loadLibrary("rnllama_v8_7");
|
433
487
|
} else if (LlamaContext.isX86_64()) {
|
434
|
-
|
435
|
-
|
436
|
-
|
488
|
+
Log.d(NAME, "Loading librnllama_x86_64.so");
|
489
|
+
System.loadLibrary("rnllama_x86_64");
|
490
|
+
loadedLibrary = "rnllama_x86_64";
|
437
491
|
} else {
|
438
|
-
|
439
|
-
System.loadLibrary("rnllama");
|
440
|
-
loadedLibrary = "rnllama";
|
492
|
+
Log.d(NAME, "ARM32 is not supported, skipping loading library");
|
441
493
|
}
|
442
|
-
}
|
494
|
+
}
|
443
495
|
|
444
496
|
public static boolean isArm64V8a() {
|
445
497
|
return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
|
@@ -449,6 +501,10 @@ public class LlamaContext {
|
|
449
501
|
return Build.SUPPORTED_ABIS[0].equals("x86_64");
|
450
502
|
}
|
451
503
|
|
504
|
+
private static boolean isArchNotSupported() {
|
505
|
+
return isArm64V8a() == false && isX86_64() == false;
|
506
|
+
}
|
507
|
+
|
452
508
|
public static String getCpuFeatures() {
|
453
509
|
File file = new File("/proc/cpuinfo");
|
454
510
|
StringBuilder stringBuilder = new StringBuilder();
|
@@ -481,6 +537,8 @@ public class LlamaContext {
|
|
481
537
|
);
|
482
538
|
protected static native long initContext(
|
483
539
|
String model,
|
540
|
+
String chat_template,
|
541
|
+
String reasoning_format,
|
484
542
|
boolean embedding,
|
485
543
|
int embd_normalize,
|
486
544
|
int n_ctx,
|
@@ -506,9 +564,18 @@ public class LlamaContext {
|
|
506
564
|
protected static native WritableMap loadModelDetails(
|
507
565
|
long contextPtr
|
508
566
|
);
|
567
|
+
protected static native WritableMap getFormattedChatWithJinja(
|
568
|
+
long contextPtr,
|
569
|
+
String messages,
|
570
|
+
String chatTemplate,
|
571
|
+
String jsonSchema,
|
572
|
+
String tools,
|
573
|
+
boolean parallelToolCalls,
|
574
|
+
String toolChoice
|
575
|
+
);
|
509
576
|
protected static native String getFormattedChat(
|
510
577
|
long contextPtr,
|
511
|
-
|
578
|
+
String messages,
|
512
579
|
String chatTemplate
|
513
580
|
);
|
514
581
|
protected static native WritableMap loadSession(
|
@@ -523,7 +590,12 @@ public class LlamaContext {
|
|
523
590
|
protected static native WritableMap doCompletion(
|
524
591
|
long context_ptr,
|
525
592
|
String prompt,
|
593
|
+
int chat_format,
|
526
594
|
String grammar,
|
595
|
+
String json_schema,
|
596
|
+
boolean grammar_lazy,
|
597
|
+
ReadableArray grammar_triggers,
|
598
|
+
ReadableArray preserved_tokens,
|
527
599
|
float temperature,
|
528
600
|
int n_threads,
|
529
601
|
int n_predict,
|
@@ -549,6 +621,7 @@ public class LlamaContext {
|
|
549
621
|
float dry_base,
|
550
622
|
int dry_allowed_length,
|
551
623
|
int dry_penalty_last_n,
|
624
|
+
float top_n_sigma,
|
552
625
|
String[] dry_sequence_breakers,
|
553
626
|
PartialCompletionCallback partial_completion_callback
|
554
627
|
);
|
@@ -567,5 +640,6 @@ public class LlamaContext {
|
|
567
640
|
protected static native void removeLoraAdapters(long contextPtr);
|
568
641
|
protected static native WritableArray getLoadedLoraAdapters(long contextPtr);
|
569
642
|
protected static native void freeContext(long contextPtr);
|
570
|
-
protected static native void
|
643
|
+
protected static native void setupLog(NativeLogCallback logCallback);
|
644
|
+
protected static native void unsetLog();
|
571
645
|
}
|
@@ -35,6 +35,32 @@ public class RNLlama implements LifecycleEventListener {
|
|
35
35
|
|
36
36
|
private HashMap<Integer, LlamaContext> contexts = new HashMap<>();
|
37
37
|
|
38
|
+
public void toggleNativeLog(boolean enabled, Promise promise) {
|
39
|
+
new AsyncTask<Void, Void, Boolean>() {
|
40
|
+
private Exception exception;
|
41
|
+
|
42
|
+
@Override
|
43
|
+
protected Boolean doInBackground(Void... voids) {
|
44
|
+
try {
|
45
|
+
LlamaContext.toggleNativeLog(reactContext, enabled);
|
46
|
+
return true;
|
47
|
+
} catch (Exception e) {
|
48
|
+
exception = e;
|
49
|
+
}
|
50
|
+
return null;
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
protected void onPostExecute(Boolean result) {
|
55
|
+
if (exception != null) {
|
56
|
+
promise.reject(exception);
|
57
|
+
return;
|
58
|
+
}
|
59
|
+
promise.resolve(result);
|
60
|
+
}
|
61
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
62
|
+
}
|
63
|
+
|
38
64
|
private int llamaContextLimit = -1;
|
39
65
|
|
40
66
|
public void setContextLimit(double limit, Promise promise) {
|
@@ -116,18 +142,25 @@ public class RNLlama implements LifecycleEventListener {
|
|
116
142
|
tasks.put(task, "initContext");
|
117
143
|
}
|
118
144
|
|
119
|
-
public void getFormattedChat(double id, final
|
145
|
+
public void getFormattedChat(double id, final String messages, final String chatTemplate, final ReadableMap params, Promise promise) {
|
120
146
|
final int contextId = (int) id;
|
121
|
-
AsyncTask task = new AsyncTask<Void, Void,
|
147
|
+
AsyncTask task = new AsyncTask<Void, Void, Object>() {
|
122
148
|
private Exception exception;
|
123
149
|
|
124
150
|
@Override
|
125
|
-
protected
|
151
|
+
protected Object doInBackground(Void... voids) {
|
126
152
|
try {
|
127
153
|
LlamaContext context = contexts.get(contextId);
|
128
154
|
if (context == null) {
|
129
155
|
throw new Exception("Context not found");
|
130
156
|
}
|
157
|
+
if (params.hasKey("jinja") && params.getBoolean("jinja")) {
|
158
|
+
ReadableMap result = context.getFormattedChatWithJinja(messages, chatTemplate, params);
|
159
|
+
if (result.hasKey("_error")) {
|
160
|
+
throw new Exception(result.getString("_error"));
|
161
|
+
}
|
162
|
+
return result;
|
163
|
+
}
|
131
164
|
return context.getFormattedChat(messages, chatTemplate);
|
132
165
|
} catch (Exception e) {
|
133
166
|
exception = e;
|
@@ -136,7 +169,7 @@ public class RNLlama implements LifecycleEventListener {
|
|
136
169
|
}
|
137
170
|
|
138
171
|
@Override
|
139
|
-
protected void onPostExecute(
|
172
|
+
protected void onPostExecute(Object result) {
|
140
173
|
if (exception != null) {
|
141
174
|
promise.reject(exception);
|
142
175
|
return;
|
@@ -16,6 +16,12 @@ jobject getMap(JNIEnv *env, jobject readableArray, int index) {
|
|
16
16
|
return env->CallObjectMethod(readableArray, getMapMethod, index);
|
17
17
|
}
|
18
18
|
|
19
|
+
jstring getString(JNIEnv *env, jobject readableArray, int index) {
|
20
|
+
jclass arrayClass = env->GetObjectClass(readableArray);
|
21
|
+
jmethodID getStringMethod = env->GetMethodID(arrayClass, "getString", "(I)Ljava/lang/String;");
|
22
|
+
return (jstring) env->CallObjectMethod(readableArray, getStringMethod, index);
|
23
|
+
}
|
24
|
+
|
19
25
|
// Other methods not used yet
|
20
26
|
|
21
27
|
}
|