cui-llama.rn 1.4.3 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -114
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
- package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
- package/android/src/main/jni-utils.h +6 -0
- package/android/src/main/jni.cpp +289 -31
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/cpp/chat-template.hpp +529 -0
- package/cpp/chat.cpp +1779 -0
- package/cpp/chat.h +135 -0
- package/cpp/common.cpp +2064 -1873
- package/cpp/common.h +700 -699
- package/cpp/ggml-alloc.c +1039 -1042
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +255 -255
- package/cpp/ggml-backend-reg.cpp +586 -582
- package/cpp/ggml-backend.cpp +2004 -2002
- package/cpp/ggml-backend.h +354 -354
- package/cpp/ggml-common.h +1851 -1853
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +531 -386
- package/cpp/ggml-cpu-quants.c +12527 -10920
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +15766 -14391
- package/cpp/ggml-cpu.cpp +655 -635
- package/cpp/ggml-cpu.h +138 -135
- package/cpp/ggml-impl.h +567 -567
- package/cpp/ggml-metal-impl.h +235 -0
- package/cpp/ggml-metal.h +1 -1
- package/cpp/ggml-metal.m +5146 -4884
- package/cpp/ggml-opt.cpp +854 -854
- package/cpp/ggml-opt.h +216 -216
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6529 -6514
- package/cpp/ggml.h +2198 -2194
- package/cpp/gguf.cpp +1329 -1329
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1024 -1045
- package/cpp/json-schema-to-grammar.h +21 -8
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +347 -347
- package/cpp/llama-adapter.h +74 -74
- package/cpp/llama-arch.cpp +1513 -1487
- package/cpp/llama-arch.h +403 -400
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +588 -578
- package/cpp/llama-chat.h +53 -52
- package/cpp/llama-context.cpp +1775 -1775
- package/cpp/llama-context.h +128 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +37 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1219 -1139
- package/cpp/llama-grammar.h +173 -143
- package/cpp/llama-hparams.cpp +71 -71
- package/cpp/llama-hparams.h +139 -139
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-kv-cache.cpp +718 -718
- package/cpp/llama-kv-cache.h +219 -218
- package/cpp/llama-mmap.cpp +600 -590
- package/cpp/llama-mmap.h +68 -67
- package/cpp/llama-model-loader.cpp +1124 -1124
- package/cpp/llama-model-loader.h +167 -167
- package/cpp/llama-model.cpp +4087 -3997
- package/cpp/llama-model.h +370 -370
- package/cpp/llama-sampling.cpp +2558 -2408
- package/cpp/llama-sampling.h +32 -32
- package/cpp/llama-vocab.cpp +3264 -3247
- package/cpp/llama-vocab.h +125 -125
- package/cpp/llama.cpp +10284 -10077
- package/cpp/llama.h +1354 -1323
- package/cpp/log.cpp +393 -401
- package/cpp/log.h +132 -121
- package/cpp/minja/chat-template.hpp +529 -0
- package/cpp/minja/minja.hpp +2915 -0
- package/cpp/minja.hpp +2915 -0
- package/cpp/rn-llama.cpp +66 -6
- package/cpp/rn-llama.h +26 -1
- package/cpp/sampling.cpp +570 -505
- package/cpp/sampling.h +3 -0
- package/cpp/sgemm.cpp +2598 -2597
- package/cpp/sgemm.h +14 -14
- package/cpp/speculative.cpp +278 -277
- package/cpp/speculative.h +28 -28
- package/cpp/unicode.cpp +9 -2
- package/ios/CMakeLists.txt +6 -0
- package/ios/RNLlama.h +0 -8
- package/ios/RNLlama.mm +27 -3
- package/ios/RNLlamaContext.h +10 -1
- package/ios/RNLlamaContext.mm +269 -57
- package/jest/mock.js +21 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +3 -0
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +87 -13
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +3 -0
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +86 -13
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +107 -2
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +32 -7
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +1 -1
- package/package.json +3 -2
- package/src/NativeRNLlama.ts +115 -3
- package/src/grammar.ts +3 -0
- package/src/index.ts +138 -21
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
- package/cpp/rn-llama.hpp +0 -913
package/src/NativeRNLlama.ts
CHANGED
@@ -7,6 +7,13 @@ export type NativeEmbeddingParams = {
|
|
7
7
|
|
8
8
|
export type NativeContextParams = {
|
9
9
|
model: string
|
10
|
+
/**
|
11
|
+
* Chat template to override the default one from the model.
|
12
|
+
*/
|
13
|
+
chat_template?: string
|
14
|
+
|
15
|
+
reasoning_format?: string
|
16
|
+
|
10
17
|
is_model_asset?: boolean
|
11
18
|
use_progress_callback?: boolean
|
12
19
|
|
@@ -15,7 +22,15 @@ export type NativeContextParams = {
|
|
15
22
|
n_ubatch?: number
|
16
23
|
|
17
24
|
n_threads?: number
|
25
|
+
|
26
|
+
/**
|
27
|
+
* Number of layers to store in VRAM (Currently only for iOS)
|
28
|
+
*/
|
18
29
|
n_gpu_layers?: number
|
30
|
+
/**
|
31
|
+
* Skip GPU devices (iOS only)
|
32
|
+
*/
|
33
|
+
no_gpu_devices?: boolean
|
19
34
|
|
20
35
|
/**
|
21
36
|
* Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
|
@@ -61,10 +76,28 @@ export type NativeContextParams = {
|
|
61
76
|
export type NativeCompletionParams = {
|
62
77
|
prompt: string
|
63
78
|
n_threads?: number
|
79
|
+
/**
|
80
|
+
* JSON schema for convert to grammar for structured JSON output.
|
81
|
+
* It will be override by grammar if both are set.
|
82
|
+
*/
|
83
|
+
json_schema?: string
|
64
84
|
/**
|
65
85
|
* Set grammar for grammar-based sampling. Default: no grammar
|
66
86
|
*/
|
67
87
|
grammar?: string
|
88
|
+
/**
|
89
|
+
* Lazy grammar sampling, trigger by grammar_triggers. Default: false
|
90
|
+
*/
|
91
|
+
grammar_lazy?: boolean
|
92
|
+
/**
|
93
|
+
* Lazy grammar triggers. Default: []
|
94
|
+
*/
|
95
|
+
grammar_triggers?: Array<{
|
96
|
+
at_start: boolean
|
97
|
+
word: string
|
98
|
+
}>
|
99
|
+
preserved_tokens?: Array<string>
|
100
|
+
chat_format?: number
|
68
101
|
/**
|
69
102
|
* Specify a JSON array of stopping strings.
|
70
103
|
* These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
|
@@ -158,6 +191,11 @@ export type NativeCompletionParams = {
|
|
158
191
|
* Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\n', ':', '"', '*']`
|
159
192
|
*/
|
160
193
|
dry_sequence_breakers?: Array<string>
|
194
|
+
/**
|
195
|
+
* Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)
|
196
|
+
*/
|
197
|
+
top_n_sigma?: number
|
198
|
+
|
161
199
|
/**
|
162
200
|
* Ignore end of stream token and continue generating. Default: `false`
|
163
201
|
*/
|
@@ -200,7 +238,29 @@ export type NativeCompletionResultTimings = {
|
|
200
238
|
}
|
201
239
|
|
202
240
|
export type NativeCompletionResult = {
|
241
|
+
/**
|
242
|
+
* Original text (Ignored reasoning_content / tool_calls)
|
243
|
+
*/
|
203
244
|
text: string
|
245
|
+
/**
|
246
|
+
* Reasoning content (parsed for reasoning model)
|
247
|
+
*/
|
248
|
+
reasoning_content: string
|
249
|
+
/**
|
250
|
+
* Tool calls
|
251
|
+
*/
|
252
|
+
tool_calls: Array<{
|
253
|
+
type: 'function'
|
254
|
+
function: {
|
255
|
+
name: string
|
256
|
+
arguments: string
|
257
|
+
}
|
258
|
+
id?: string
|
259
|
+
}>
|
260
|
+
/**
|
261
|
+
* Content text (Filtered text by reasoning_content / tool_calls)
|
262
|
+
*/
|
263
|
+
content: string
|
204
264
|
|
205
265
|
tokens_predicted: number
|
206
266
|
tokens_evaluated: number
|
@@ -225,7 +285,38 @@ export type NativeEmbeddingResult = {
|
|
225
285
|
|
226
286
|
export type NativeLlamaContext = {
|
227
287
|
contextId: number
|
228
|
-
model:
|
288
|
+
model: {
|
289
|
+
desc: string
|
290
|
+
size: number
|
291
|
+
nEmbd: number
|
292
|
+
nParams: number
|
293
|
+
chatTemplates: {
|
294
|
+
llamaChat: boolean // Chat template in llama-chat.cpp
|
295
|
+
minja: {
|
296
|
+
// Chat template supported by minja.hpp
|
297
|
+
default: boolean
|
298
|
+
defaultCaps: {
|
299
|
+
tools: boolean
|
300
|
+
toolCalls: boolean
|
301
|
+
toolResponses: boolean
|
302
|
+
systemRole: boolean
|
303
|
+
parallelToolCalls: boolean
|
304
|
+
toolCallId: boolean
|
305
|
+
}
|
306
|
+
toolUse: boolean
|
307
|
+
toolUseCaps: {
|
308
|
+
tools: boolean
|
309
|
+
toolCalls: boolean
|
310
|
+
toolResponses: boolean
|
311
|
+
systemRole: boolean
|
312
|
+
parallelToolCalls: boolean
|
313
|
+
toolCallId: boolean
|
314
|
+
}
|
315
|
+
}
|
316
|
+
}
|
317
|
+
metadata: Object
|
318
|
+
isChatTemplateSupported: boolean // Deprecated
|
319
|
+
}
|
229
320
|
/**
|
230
321
|
* Loaded library name for Android
|
231
322
|
*/
|
@@ -250,7 +341,21 @@ export type NativeCPUFeatures = {
|
|
250
341
|
dotprod: boolean
|
251
342
|
}
|
252
343
|
|
344
|
+
export type JinjaFormattedChatResult = {
|
345
|
+
prompt: string
|
346
|
+
chat_format?: number
|
347
|
+
grammar?: string
|
348
|
+
grammar_lazy?: boolean
|
349
|
+
grammar_triggers?: Array<{
|
350
|
+
at_start: boolean
|
351
|
+
word: string
|
352
|
+
}>
|
353
|
+
preserved_tokens?: Array<string>
|
354
|
+
additional_stops?: Array<string>
|
355
|
+
}
|
356
|
+
|
253
357
|
export interface Spec extends TurboModule {
|
358
|
+
toggleNativeLog(enabled: boolean): Promise<void>
|
254
359
|
setContextLimit(limit: number): Promise<void>
|
255
360
|
|
256
361
|
modelInfo(path: string, skip?: string[]): Promise<Object>
|
@@ -261,9 +366,16 @@ export interface Spec extends TurboModule {
|
|
261
366
|
|
262
367
|
getFormattedChat(
|
263
368
|
contextId: number,
|
264
|
-
messages:
|
369
|
+
messages: string,
|
265
370
|
chatTemplate?: string,
|
266
|
-
|
371
|
+
params?: {
|
372
|
+
jinja?: boolean
|
373
|
+
json_schema?: string
|
374
|
+
tools?: string
|
375
|
+
parallel_tool_calls?: string
|
376
|
+
tool_choice?: string
|
377
|
+
},
|
378
|
+
): Promise<JinjaFormattedChatResult | string>
|
267
379
|
loadSession(
|
268
380
|
contextId: number,
|
269
381
|
filepath: string,
|
package/src/grammar.ts
CHANGED
package/src/index.ts
CHANGED
@@ -14,6 +14,7 @@ import type {
|
|
14
14
|
NativeEmbeddingParams,
|
15
15
|
NativeCompletionTokenProbItem,
|
16
16
|
NativeCompletionResultTimings,
|
17
|
+
JinjaFormattedChatResult,
|
17
18
|
} from './NativeRNLlama'
|
18
19
|
import type {
|
19
20
|
SchemaGrammarConverterPropOrder,
|
@@ -37,6 +38,9 @@ export type {
|
|
37
38
|
NativeCompletionResultTimings,
|
38
39
|
RNLlamaMessagePart,
|
39
40
|
RNLlamaOAICompatibleMessage,
|
41
|
+
JinjaFormattedChatResult,
|
42
|
+
|
43
|
+
// Deprecated
|
40
44
|
SchemaGrammarConverterPropOrder,
|
41
45
|
SchemaGrammarConverterBuiltinRule,
|
42
46
|
}
|
@@ -45,6 +49,7 @@ export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
|
|
45
49
|
|
46
50
|
const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
|
47
51
|
const EVENT_ON_TOKEN = '@RNLlama_onToken'
|
52
|
+
const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog'
|
48
53
|
|
49
54
|
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
50
55
|
if (Platform.OS === 'ios') {
|
@@ -55,6 +60,19 @@ if (Platform.OS === 'android') {
|
|
55
60
|
EventEmitter = DeviceEventEmitter
|
56
61
|
}
|
57
62
|
|
63
|
+
const logListeners: Array<(level: string, text: string) => void> = []
|
64
|
+
|
65
|
+
// @ts-ignore
|
66
|
+
if (EventEmitter) {
|
67
|
+
EventEmitter.addListener(
|
68
|
+
EVENT_ON_NATIVE_LOG,
|
69
|
+
(evt: { level: string; text: string }) => {
|
70
|
+
logListeners.forEach((listener) => listener(evt.level, evt.text))
|
71
|
+
},
|
72
|
+
)
|
73
|
+
RNLlama?.toggleNativeLog?.(false) // Trigger unset to use default log callback
|
74
|
+
}
|
75
|
+
|
58
76
|
export type TokenData = {
|
59
77
|
token: string
|
60
78
|
completion_probabilities?: Array<NativeCompletionTokenProb>
|
@@ -88,14 +106,31 @@ export type ContextParams = Omit<
|
|
88
106
|
|
89
107
|
export type EmbeddingParams = NativeEmbeddingParams
|
90
108
|
|
91
|
-
export type
|
92
|
-
|
93
|
-
|
94
|
-
|
109
|
+
export type CompletionResponseFormat = {
|
110
|
+
type: 'text' | 'json_object' | 'json_schema'
|
111
|
+
json_schema?: {
|
112
|
+
strict?: boolean
|
113
|
+
schema: object
|
114
|
+
}
|
115
|
+
schema?: object // for json_object type
|
116
|
+
}
|
117
|
+
|
118
|
+
export type CompletionBaseParams = {
|
95
119
|
prompt?: string
|
96
120
|
messages?: RNLlamaOAICompatibleMessage[]
|
97
|
-
chatTemplate?: string
|
121
|
+
chatTemplate?: string // deprecated
|
122
|
+
chat_template?: string
|
123
|
+
jinja?: boolean
|
124
|
+
tools?: object
|
125
|
+
parallel_tool_calls?: object
|
126
|
+
tool_choice?: string
|
127
|
+
response_format?: CompletionResponseFormat
|
98
128
|
}
|
129
|
+
export type CompletionParams = Omit<
|
130
|
+
NativeCompletionParams,
|
131
|
+
'emit_partial_completion' | 'prompt'
|
132
|
+
> &
|
133
|
+
CompletionBaseParams
|
99
134
|
|
100
135
|
export type BenchResult = {
|
101
136
|
modelDesc: string
|
@@ -107,6 +142,16 @@ export type BenchResult = {
|
|
107
142
|
tgStd: number
|
108
143
|
}
|
109
144
|
|
145
|
+
const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
|
146
|
+
if (responseFormat?.type === 'json_schema') {
|
147
|
+
return responseFormat.json_schema?.schema
|
148
|
+
}
|
149
|
+
if (responseFormat?.type === 'json_object') {
|
150
|
+
return responseFormat.schema || {}
|
151
|
+
}
|
152
|
+
return null
|
153
|
+
}
|
154
|
+
|
110
155
|
export class LlamaContext {
|
111
156
|
id: number
|
112
157
|
|
@@ -114,9 +159,7 @@ export class LlamaContext {
|
|
114
159
|
|
115
160
|
reasonNoGPU: string = ''
|
116
161
|
|
117
|
-
model:
|
118
|
-
isChatTemplateSupported?: boolean
|
119
|
-
} = {}
|
162
|
+
model: NativeLlamaContext['model']
|
120
163
|
|
121
164
|
constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
|
122
165
|
this.id = contextId
|
@@ -144,27 +187,89 @@ export class LlamaContext {
|
|
144
187
|
return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
|
145
188
|
}
|
146
189
|
|
190
|
+
isLlamaChatSupported(): boolean {
|
191
|
+
return !!this.model.chatTemplates.llamaChat
|
192
|
+
}
|
193
|
+
|
194
|
+
isJinjaSupported(): boolean {
|
195
|
+
const { minja } = this.model.chatTemplates
|
196
|
+
return !!minja?.toolUse || !!minja?.default
|
197
|
+
}
|
198
|
+
|
147
199
|
async getFormattedChat(
|
148
200
|
messages: RNLlamaOAICompatibleMessage[],
|
149
|
-
template?: string,
|
150
|
-
|
201
|
+
template?: string | null,
|
202
|
+
params?: {
|
203
|
+
jinja?: boolean
|
204
|
+
response_format?: CompletionResponseFormat
|
205
|
+
tools?: object
|
206
|
+
parallel_tool_calls?: object
|
207
|
+
tool_choice?: string
|
208
|
+
},
|
209
|
+
): Promise<JinjaFormattedChatResult | string> {
|
151
210
|
const chat = formatChat(messages)
|
152
|
-
|
211
|
+
const useJinja = this.isJinjaSupported() && params?.jinja
|
212
|
+
let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
|
153
213
|
if (template) tmpl = template // Force replace if provided
|
154
|
-
|
214
|
+
const jsonSchema = getJsonSchema(params?.response_format)
|
215
|
+
return RNLlama.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
|
216
|
+
jinja: useJinja,
|
217
|
+
json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
|
218
|
+
tools: params?.tools ? JSON.stringify(params.tools) : undefined,
|
219
|
+
parallel_tool_calls: params?.parallel_tool_calls
|
220
|
+
? JSON.stringify(params.parallel_tool_calls)
|
221
|
+
: undefined,
|
222
|
+
tool_choice: params?.tool_choice,
|
223
|
+
})
|
155
224
|
}
|
156
225
|
|
157
226
|
async completion(
|
158
227
|
params: CompletionParams,
|
159
228
|
callback?: (data: TokenData) => void,
|
160
229
|
): Promise<NativeCompletionResult> {
|
161
|
-
|
230
|
+
const nativeParams = {
|
231
|
+
...params,
|
232
|
+
prompt: params.prompt || '',
|
233
|
+
emit_partial_completion: !!callback,
|
234
|
+
}
|
162
235
|
if (params.messages) {
|
163
236
|
// messages always win
|
164
|
-
|
237
|
+
const formattedResult = await this.getFormattedChat(
|
165
238
|
params.messages,
|
166
|
-
params.chatTemplate,
|
239
|
+
params.chat_template || params.chatTemplate,
|
240
|
+
{
|
241
|
+
jinja: params.jinja,
|
242
|
+
tools: params.tools,
|
243
|
+
parallel_tool_calls: params.parallel_tool_calls,
|
244
|
+
tool_choice: params.tool_choice,
|
245
|
+
},
|
167
246
|
)
|
247
|
+
if (typeof formattedResult === 'string') {
|
248
|
+
nativeParams.prompt = formattedResult || ''
|
249
|
+
} else {
|
250
|
+
nativeParams.prompt = formattedResult.prompt || ''
|
251
|
+
if (typeof formattedResult.chat_format === 'number')
|
252
|
+
nativeParams.chat_format = formattedResult.chat_format
|
253
|
+
if (formattedResult.grammar)
|
254
|
+
nativeParams.grammar = formattedResult.grammar
|
255
|
+
if (typeof formattedResult.grammar_lazy === 'boolean')
|
256
|
+
nativeParams.grammar_lazy = formattedResult.grammar_lazy
|
257
|
+
if (formattedResult.grammar_triggers)
|
258
|
+
nativeParams.grammar_triggers = formattedResult.grammar_triggers
|
259
|
+
if (formattedResult.preserved_tokens)
|
260
|
+
nativeParams.preserved_tokens = formattedResult.preserved_tokens
|
261
|
+
if (formattedResult.additional_stops) {
|
262
|
+
if (!nativeParams.stop) nativeParams.stop = []
|
263
|
+
nativeParams.stop.push(...formattedResult.additional_stops)
|
264
|
+
}
|
265
|
+
}
|
266
|
+
} else {
|
267
|
+
nativeParams.prompt = params.prompt || ''
|
268
|
+
}
|
269
|
+
|
270
|
+
if (nativeParams.response_format && !nativeParams.grammar) {
|
271
|
+
const jsonSchema = getJsonSchema(params.response_format)
|
272
|
+
if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema)
|
168
273
|
}
|
169
274
|
|
170
275
|
let tokenListener: any =
|
@@ -175,12 +280,9 @@ export class LlamaContext {
|
|
175
280
|
callback(tokenResult)
|
176
281
|
})
|
177
282
|
|
178
|
-
if (!
|
179
|
-
|
180
|
-
|
181
|
-
prompt: finalPrompt,
|
182
|
-
emit_partial_completion: !!callback,
|
183
|
-
})
|
283
|
+
if (!nativeParams.prompt) throw new Error('Prompt is required')
|
284
|
+
|
285
|
+
const promise = RNLlama.completion(this.id, nativeParams)
|
184
286
|
return promise
|
185
287
|
.then((completionResult) => {
|
186
288
|
tokenListener?.remove()
|
@@ -268,6 +370,21 @@ export async function getCpuFeatures() : Promise<NativeCPUFeatures> {
|
|
268
370
|
return RNLlama.getCpuFeatures()
|
269
371
|
}
|
270
372
|
|
373
|
+
export async function toggleNativeLog(enabled: boolean): Promise<void> {
|
374
|
+
return RNLlama.toggleNativeLog(enabled)
|
375
|
+
}
|
376
|
+
|
377
|
+
export function addNativeLogListener(
|
378
|
+
listener: (level: string, text: string) => void,
|
379
|
+
): { remove: () => void } {
|
380
|
+
logListeners.push(listener)
|
381
|
+
return {
|
382
|
+
remove: () => {
|
383
|
+
logListeners.splice(logListeners.indexOf(listener), 1)
|
384
|
+
},
|
385
|
+
}
|
386
|
+
}
|
387
|
+
|
271
388
|
export async function setContextLimit(limit: number): Promise<void> {
|
272
389
|
return RNLlama.setContextLimit(limit)
|
273
390
|
}
|
@@ -1,81 +0,0 @@
|
|
1
|
-
set(CMAKE_C_COMPILER "D:/Android/Sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64/bin/clang.exe")
|
2
|
-
set(CMAKE_C_COMPILER_ARG1 "")
|
3
|
-
set(CMAKE_C_COMPILER_ID "Clang")
|
4
|
-
set(CMAKE_C_COMPILER_VERSION "17.0.2")
|
5
|
-
set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
|
6
|
-
set(CMAKE_C_COMPILER_WRAPPER "")
|
7
|
-
set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
|
8
|
-
set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
|
9
|
-
set(CMAKE_C_STANDARD_LATEST "")
|
10
|
-
set(CMAKE_C_COMPILE_FEATURES "")
|
11
|
-
set(CMAKE_C90_COMPILE_FEATURES "")
|
12
|
-
set(CMAKE_C99_COMPILE_FEATURES "")
|
13
|
-
set(CMAKE_C11_COMPILE_FEATURES "")
|
14
|
-
set(CMAKE_C17_COMPILE_FEATURES "")
|
15
|
-
set(CMAKE_C23_COMPILE_FEATURES "")
|
16
|
-
|
17
|
-
set(CMAKE_C_PLATFORM_ID "Linux")
|
18
|
-
set(CMAKE_C_SIMULATE_ID "")
|
19
|
-
set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
|
20
|
-
set(CMAKE_C_SIMULATE_VERSION "")
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
set(CMAKE_AR "D:/Android/Sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64/bin/llvm-ar.exe")
|
26
|
-
set(CMAKE_C_COMPILER_AR "D:/Android/Sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64/bin/llvm-ar.exe")
|
27
|
-
set(CMAKE_RANLIB "D:/Android/Sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64/bin/llvm-ranlib.exe")
|
28
|
-
set(CMAKE_C_COMPILER_RANLIB "D:/Android/Sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64/bin/llvm-ranlib.exe")
|
29
|
-
set(CMAKE_LINKER "D:/Android/Sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64/bin/ld.lld.exe")
|
30
|
-
set(CMAKE_LINKER_LINK "")
|
31
|
-
set(CMAKE_LINKER_LLD "")
|
32
|
-
set(CMAKE_C_COMPILER_LINKER "")
|
33
|
-
set(CMAKE_C_COMPILER_LINKER_ID "")
|
34
|
-
set(CMAKE_C_COMPILER_LINKER_VERSION )
|
35
|
-
set(CMAKE_C_COMPILER_LINKER_FRONTEND_VARIANT )
|
36
|
-
set(CMAKE_MT "")
|
37
|
-
set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
|
38
|
-
set(CMAKE_COMPILER_IS_GNUCC )
|
39
|
-
set(CMAKE_C_COMPILER_LOADED 1)
|
40
|
-
set(CMAKE_C_COMPILER_WORKS )
|
41
|
-
set(CMAKE_C_ABI_COMPILED )
|
42
|
-
|
43
|
-
set(CMAKE_C_COMPILER_ENV_VAR "CC")
|
44
|
-
|
45
|
-
set(CMAKE_C_COMPILER_ID_RUN 1)
|
46
|
-
set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
|
47
|
-
set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
|
48
|
-
set(CMAKE_C_LINKER_PREFERENCE 10)
|
49
|
-
set(CMAKE_C_LINKER_DEPFILE_SUPPORTED )
|
50
|
-
|
51
|
-
# Save compiler ABI information.
|
52
|
-
set(CMAKE_C_SIZEOF_DATA_PTR "")
|
53
|
-
set(CMAKE_C_COMPILER_ABI "")
|
54
|
-
set(CMAKE_C_BYTE_ORDER "")
|
55
|
-
set(CMAKE_C_LIBRARY_ARCHITECTURE "")
|
56
|
-
|
57
|
-
if(CMAKE_C_SIZEOF_DATA_PTR)
|
58
|
-
set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
|
59
|
-
endif()
|
60
|
-
|
61
|
-
if(CMAKE_C_COMPILER_ABI)
|
62
|
-
set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
|
63
|
-
endif()
|
64
|
-
|
65
|
-
if(CMAKE_C_LIBRARY_ARCHITECTURE)
|
66
|
-
set(CMAKE_LIBRARY_ARCHITECTURE "")
|
67
|
-
endif()
|
68
|
-
|
69
|
-
set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
|
70
|
-
if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
|
71
|
-
set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
|
72
|
-
endif()
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "")
|
79
|
-
set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "")
|
80
|
-
set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "")
|
81
|
-
set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
|
@@ -1,15 +0,0 @@
|
|
1
|
-
set(CMAKE_HOST_SYSTEM "Windows-10.0.26120")
|
2
|
-
set(CMAKE_HOST_SYSTEM_NAME "Windows")
|
3
|
-
set(CMAKE_HOST_SYSTEM_VERSION "10.0.26120")
|
4
|
-
set(CMAKE_HOST_SYSTEM_PROCESSOR "AMD64")
|
5
|
-
|
6
|
-
include("D:/Android/Sdk/ndk/26.3.11579264/build/cmake/android.toolchain.cmake")
|
7
|
-
|
8
|
-
set(CMAKE_SYSTEM "Android-1")
|
9
|
-
set(CMAKE_SYSTEM_NAME "Android")
|
10
|
-
set(CMAKE_SYSTEM_VERSION "1")
|
11
|
-
set(CMAKE_SYSTEM_PROCESSOR "aarch64")
|
12
|
-
|
13
|
-
set(CMAKE_CROSSCOMPILING "TRUE")
|
14
|
-
|
15
|
-
set(CMAKE_SYSTEM_LOADED 1)
|