cui-llama.rn 1.4.2 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -114
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/build-arm64/CMakeCache.txt +429 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +81 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +101 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +15 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +904 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +919 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +431 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +165 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile2 +297 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/1 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/2 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/3 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/4 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/5 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/6 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +1 -0
- package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +8 -0
- package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +1 -0
- package/android/src/main/build-arm64/CMakeFiles/progress.marks +1 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +58 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +756 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +709 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +708 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +113 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +713 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +763 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +61 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +707 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +104 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +723 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/Makefile +1862 -0
- package/android/src/main/build-arm64/cmake_install.cmake +66 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +92 -18
- package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
- package/android/src/main/jni-utils.h +6 -0
- package/android/src/main/jni.cpp +287 -31
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/cpp/chat-template.hpp +529 -0
- package/cpp/chat.cpp +1085 -0
- package/cpp/chat.hpp +55 -0
- package/cpp/common.cpp +159 -36
- package/cpp/common.h +64 -19
- package/cpp/ggml-alloc.c +1 -13
- package/cpp/ggml-common.h +0 -2
- package/cpp/ggml-cpu-impl.h +6 -12
- package/cpp/ggml-cpu-quants.c +937 -340
- package/cpp/ggml-cpu.c +207 -113
- package/cpp/ggml-cpu.cpp +4 -6
- package/cpp/ggml-cpu.h +1 -1
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +141 -23
- package/cpp/ggml.c +24 -14
- package/cpp/ggml.h +2 -2
- package/cpp/json-schema-to-grammar.cpp +46 -66
- package/cpp/json-schema-to-grammar.h +15 -1
- package/cpp/llama-arch.cpp +7 -2
- package/cpp/llama-arch.h +3 -1
- package/cpp/llama-chat.cpp +10 -1
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-grammar.cpp +86 -6
- package/cpp/llama-grammar.h +22 -1
- package/cpp/llama-impl.h +6 -6
- package/cpp/llama-kv-cache.h +1 -1
- package/cpp/llama-mmap.h +1 -0
- package/cpp/llama-model-loader.cpp +1 -1
- package/cpp/llama-model.cpp +32 -6
- package/cpp/llama-sampling.cpp +178 -61
- package/cpp/llama-vocab.cpp +8 -3
- package/cpp/llama.cpp +188 -128
- package/cpp/llama.h +27 -10
- package/cpp/log.cpp +32 -10
- package/cpp/log.h +12 -1
- package/cpp/minja.hpp +2883 -0
- package/cpp/rn-llama.cpp +82 -5
- package/cpp/rn-llama.h +16 -1
- package/cpp/sampling.cpp +68 -41
- package/cpp/sampling.h +3 -0
- package/cpp/sgemm.cpp +9 -8
- package/cpp/unicode.cpp +9 -2
- package/ios/CMakeLists.txt +6 -0
- package/ios/RNLlama.h +0 -8
- package/ios/RNLlama.mm +27 -3
- package/ios/RNLlamaContext.h +10 -1
- package/ios/RNLlamaContext.mm +269 -57
- package/jest/mock.js +21 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +3 -0
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +87 -13
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +3 -0
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +86 -13
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +107 -2
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +32 -7
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +1 -1
- package/package.json +2 -2
- package/src/NativeRNLlama.ts +115 -3
- package/src/grammar.ts +3 -0
- package/src/index.ts +138 -21
package/README.md
CHANGED
@@ -76,7 +76,7 @@ const context = await initLlama({
|
|
76
76
|
model: modelPath,
|
77
77
|
use_mlock: true,
|
78
78
|
n_ctx: 2048,
|
79
|
-
n_gpu_layers:
|
79
|
+
n_gpu_layers: 99, // number of layers to store in VRAM (Currently only for iOS)
|
80
80
|
// embedding: true, // use embedding
|
81
81
|
})
|
82
82
|
|
@@ -141,148 +141,127 @@ Please visit the [Documentation](docs/API) for more details.
|
|
141
141
|
|
142
142
|
You can also visit the [example](example) to see how to use it.
|
143
143
|
|
144
|
-
##
|
144
|
+
## Tool Calling
|
145
145
|
|
146
|
-
|
146
|
+
`llama.rn` has universal tool call support by using [minja](https://github.com/google/minja) (as Jinja template parser) and [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/master/common/chat.cpp) in llama.cpp.
|
147
147
|
|
148
|
-
|
149
|
-
|
150
|
-
`llama.rn` provided a built-in function to convert JSON Schema to GBNF:
|
148
|
+
Example:
|
151
149
|
|
152
150
|
```js
|
153
|
-
import { initLlama
|
154
|
-
|
155
|
-
const schema = {
|
156
|
-
/* JSON Schema, see below */
|
157
|
-
}
|
151
|
+
import { initLlama } from 'llama.rn'
|
158
152
|
|
159
153
|
const context = await initLlama({
|
160
|
-
|
161
|
-
use_mlock: true,
|
162
|
-
n_ctx: 2048,
|
163
|
-
n_gpu_layers: 1, // > 0: enable Metal on iOS
|
164
|
-
// embedding: true, // use embedding
|
165
|
-
grammar: convertJsonSchemaToGrammar({
|
166
|
-
schema,
|
167
|
-
propOrder: { function: 0, arguments: 1 },
|
168
|
-
}),
|
169
|
-
})
|
170
|
-
|
171
|
-
const { text } = await context.completion({
|
172
|
-
prompt: 'Schedule a birthday party on Aug 14th 2023 at 8pm.',
|
154
|
+
// ...params
|
173
155
|
})
|
174
|
-
console.log('Result:', text)
|
175
|
-
// Example output:
|
176
|
-
// {"function": "create_event","arguments":{"date": "Aug 14th 2023", "time": "8pm", "title": "Birthday Party"}}
|
177
|
-
```
|
178
156
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
oneOf: [
|
157
|
+
const { text, tool_calls } = await context.completion({
|
158
|
+
// ...params
|
159
|
+
jinja: true, // Enable Jinja template parser
|
160
|
+
tool_choice: 'auto',
|
161
|
+
tools: [
|
185
162
|
{
|
186
|
-
type: '
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
},
|
193
|
-
arguments: {
|
163
|
+
type: 'function',
|
164
|
+
function: {
|
165
|
+
name: 'ipython',
|
166
|
+
description:
|
167
|
+
'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.',
|
168
|
+
parameters: {
|
194
169
|
type: 'object',
|
195
170
|
properties: {
|
196
|
-
|
171
|
+
code: {
|
197
172
|
type: 'string',
|
198
|
-
description: 'The
|
199
|
-
},
|
200
|
-
unit: {
|
201
|
-
type: 'string',
|
202
|
-
enum: ['celsius', 'fahrenheit'],
|
173
|
+
description: 'The code to run in the ipython interpreter.',
|
203
174
|
},
|
204
175
|
},
|
205
|
-
required: ['
|
176
|
+
required: ['code'],
|
206
177
|
},
|
207
178
|
},
|
208
179
|
},
|
180
|
+
],
|
181
|
+
messages: [
|
209
182
|
{
|
210
|
-
|
211
|
-
|
212
|
-
description: 'Create a calendar event',
|
213
|
-
properties: {
|
214
|
-
function: {
|
215
|
-
const: 'create_event',
|
216
|
-
},
|
217
|
-
arguments: {
|
218
|
-
type: 'object',
|
219
|
-
properties: {
|
220
|
-
title: {
|
221
|
-
type: 'string',
|
222
|
-
description: 'The title of the event',
|
223
|
-
},
|
224
|
-
date: {
|
225
|
-
type: 'string',
|
226
|
-
description: 'The date of the event',
|
227
|
-
},
|
228
|
-
time: {
|
229
|
-
type: 'string',
|
230
|
-
description: 'The time of the event',
|
231
|
-
},
|
232
|
-
},
|
233
|
-
required: ['title', 'date', 'time'],
|
234
|
-
},
|
235
|
-
},
|
183
|
+
role: 'system',
|
184
|
+
content: 'You are a helpful assistant that can answer questions and help with tasks.',
|
236
185
|
},
|
237
186
|
{
|
238
|
-
|
239
|
-
|
240
|
-
description: 'Search for an image',
|
241
|
-
properties: {
|
242
|
-
function: {
|
243
|
-
const: 'image_search',
|
244
|
-
},
|
245
|
-
arguments: {
|
246
|
-
type: 'object',
|
247
|
-
properties: {
|
248
|
-
query: {
|
249
|
-
type: 'string',
|
250
|
-
description: 'The search query',
|
251
|
-
},
|
252
|
-
},
|
253
|
-
required: ['query'],
|
254
|
-
},
|
255
|
-
},
|
187
|
+
role: 'user',
|
188
|
+
content: 'Test',
|
256
189
|
},
|
257
190
|
],
|
258
|
-
}
|
191
|
+
})
|
192
|
+
console.log('Result:', text)
|
193
|
+
// If tool_calls is not empty, it means the model has called the tool
|
194
|
+
if (tool_calls) console.log('Tool Calls:', tool_calls)
|
259
195
|
```
|
260
196
|
|
261
|
-
|
197
|
+
You can check [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/6eecde3cc8fda44da7794042e3668de4af3c32c6/common/chat.cpp#L7-L23) for models has native tool calling support, or it will fallback to `GENERIC` type tool call.
|
262
198
|
|
263
|
-
|
264
|
-
<summary>Converted GBNF looks like</summary>
|
199
|
+
The generic tool call will be always JSON object as output, the output will be like `{"response": "..."}` when it not decided to use tool call.
|
265
200
|
|
201
|
+
## Grammar Sampling
|
202
|
+
|
203
|
+
GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
|
204
|
+
|
205
|
+
You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
|
206
|
+
|
207
|
+
`llama.rn` provided a built-in function to convert JSON Schema to GBNF:
|
208
|
+
|
209
|
+
Example gbnf grammar:
|
266
210
|
```bnf
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
211
|
+
root ::= object
|
212
|
+
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
213
|
+
|
214
|
+
object ::=
|
215
|
+
"{" ws (
|
216
|
+
string ":" ws value
|
217
|
+
("," ws string ":" ws value)*
|
218
|
+
)? "}" ws
|
219
|
+
|
220
|
+
array ::=
|
221
|
+
"[" ws (
|
222
|
+
value
|
223
|
+
("," ws value)*
|
224
|
+
)? "]" ws
|
225
|
+
|
226
|
+
string ::=
|
227
|
+
"\"" (
|
228
|
+
[^"\\\x7F\x00-\x1F] |
|
229
|
+
"\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
|
230
|
+
)* "\"" ws
|
231
|
+
|
232
|
+
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
|
233
|
+
|
234
|
+
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
235
|
+
ws ::= | " " | "\n" [ \t]{0,20}
|
236
|
+
```
|
237
|
+
|
238
|
+
```js
|
239
|
+
import { initLlama } from 'llama.rn'
|
240
|
+
|
241
|
+
const gbnf = '...'
|
242
|
+
|
243
|
+
const context = await initLlama({
|
244
|
+
// ...params
|
245
|
+
grammar: gbnf,
|
246
|
+
})
|
247
|
+
|
248
|
+
const { text } = await context.completion({
|
249
|
+
// ...params
|
250
|
+
messages: [
|
251
|
+
{
|
252
|
+
role: 'system',
|
253
|
+
content: 'You are a helpful assistant that can answer questions and help with tasks.',
|
254
|
+
},
|
255
|
+
{
|
256
|
+
role: 'user',
|
257
|
+
content: 'Test',
|
258
|
+
},
|
259
|
+
],
|
260
|
+
})
|
261
|
+
console.log('Result:', text)
|
283
262
|
```
|
284
263
|
|
285
|
-
|
264
|
+
Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
|
286
265
|
|
287
266
|
## Mock `llama.rn`
|
288
267
|
|
@@ -51,6 +51,11 @@ set(
|
|
51
51
|
${RNLLAMA_LIB_DIR}/unicode.cpp
|
52
52
|
${RNLLAMA_LIB_DIR}/sgemm.cpp
|
53
53
|
${RNLLAMA_LIB_DIR}/common.cpp
|
54
|
+
${RNLLAMA_LIB_DIR}/chat.cpp
|
55
|
+
${RNLLAMA_LIB_DIR}/chat-template.hpp
|
56
|
+
${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
|
57
|
+
${RNLLAMA_LIB_DIR}/minja.hpp
|
58
|
+
${RNLLAMA_LIB_DIR}/json.hpp
|
54
59
|
${RNLLAMA_LIB_DIR}/rn-llama.cpp
|
55
60
|
${CMAKE_SOURCE_DIR}/jni-utils.h
|
56
61
|
${CMAKE_SOURCE_DIR}/jni.cpp
|