cui-llama.rn 1.4.6 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -2
- package/android/src/main/jni.cpp +52 -34
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-metal-impl.h +77 -3
- package/cpp/ggml-metal.m +794 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +29 -5
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +44 -53
- package/cpp/rn-llama.h +2 -12
- package/cpp/sampling.cpp +3 -0
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +5 -2
- package/ios/RNLlama.mm +2 -2
- package/ios/RNLlamaContext.mm +40 -24
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +6 -4
- package/src/index.ts +3 -1
- package/cpp/chat-template.hpp +0 -529
- package/cpp/minja.hpp +0 -2915
package/ios/CMakeLists.txt
CHANGED
@@ -62,15 +62,18 @@ add_library(rnllama SHARED
|
|
62
62
|
${SOURCE_DIR}/llama-model-loader.cpp
|
63
63
|
${SOURCE_DIR}/llama-mmap.cpp
|
64
64
|
${SOURCE_DIR}/llama-vocab.cpp
|
65
|
+
${SOURCE_DIR}/llama-memory.cpp
|
66
|
+
${SOURCE_DIR}/llama-io.cpp
|
67
|
+
${SOURCE_DIR}/llama-graph.cpp
|
65
68
|
${SOURCE_DIR}/sampling.cpp
|
66
69
|
${SOURCE_DIR}/unicode-data.cpp
|
67
70
|
${SOURCE_DIR}/unicode.cpp
|
68
71
|
${SOURCE_DIR}/sgemm.cpp
|
69
72
|
${SOURCE_DIR}/common.cpp
|
70
73
|
${SOURCE_DIR}/chat.cpp
|
71
|
-
${SOURCE_DIR}/chat-template.hpp
|
72
74
|
${SOURCE_DIR}/json-schema-to-grammar.cpp
|
73
|
-
${SOURCE_DIR}/minja.hpp
|
75
|
+
${SOURCE_DIR}/minja/minja.hpp
|
76
|
+
${SOURCE_DIR}/minja/chat-template.hpp
|
74
77
|
${SOURCE_DIR}/json.hpp
|
75
78
|
${SOURCE_DIR}/amx/amx.cpp
|
76
79
|
${SOURCE_DIR}/amx/mmq.cpp
|
package/ios/RNLlama.mm
CHANGED
@@ -102,8 +102,8 @@ RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
|
|
102
102
|
if ([params[@"jinja"] boolValue]) {
|
103
103
|
NSString *jsonSchema = params[@"json_schema"];
|
104
104
|
NSString *tools = params[@"tools"];
|
105
|
-
|
106
|
-
NSString *toolChoice = params[@"tool_choice"]
|
105
|
+
bool parallelToolCalls = [params[@"parallel_tool_calls"] boolValue];
|
106
|
+
NSString *toolChoice = params[@"tool_choice"];
|
107
107
|
resolve([context getFormattedChatWithJinja:messages withChatTemplate:chatTemplate withJsonSchema:jsonSchema withTools:tools withParallelToolCalls:parallelToolCalls withToolChoice:toolChoice]);
|
108
108
|
} else {
|
109
109
|
resolve([context getFormattedChat:messages withChatTemplate:chatTemplate]);
|
package/ios/RNLlamaContext.mm
CHANGED
@@ -285,7 +285,7 @@
|
|
285
285
|
[meta setValue:valStr forKey:keyStr];
|
286
286
|
}
|
287
287
|
|
288
|
-
auto template_tool_use = llama->templates.template_tool_use.get();
|
288
|
+
auto template_tool_use = llama->templates.get()->template_tool_use.get();
|
289
289
|
NSDictionary *tool_use_caps_dir = nil;
|
290
290
|
if (template_tool_use) {
|
291
291
|
auto tool_use_caps = template_tool_use->original_caps();
|
@@ -299,7 +299,7 @@
|
|
299
299
|
};
|
300
300
|
}
|
301
301
|
|
302
|
-
auto default_tmpl = llama->templates.template_default.get();
|
302
|
+
auto default_tmpl = llama->templates.get()->template_default.get();
|
303
303
|
auto default_tmpl_caps = default_tmpl->original_caps();
|
304
304
|
|
305
305
|
return @{
|
@@ -356,15 +356,16 @@
|
|
356
356
|
parallelToolCalls,
|
357
357
|
toolChoice == nil ? "" : [toolChoice UTF8String]
|
358
358
|
);
|
359
|
-
result[@"prompt"] = [NSString stringWithUTF8String:chatParams.prompt.
|
359
|
+
result[@"prompt"] = [NSString stringWithUTF8String:chatParams.prompt.c_str()];
|
360
360
|
result[@"chat_format"] = @(static_cast<int>(chatParams.format));
|
361
361
|
result[@"grammar"] = [NSString stringWithUTF8String:chatParams.grammar.c_str()];
|
362
362
|
result[@"grammar_lazy"] = @(chatParams.grammar_lazy);
|
363
363
|
NSMutableArray *grammar_triggers = [[NSMutableArray alloc] init];
|
364
364
|
for (const auto & trigger : chatParams.grammar_triggers) {
|
365
365
|
[grammar_triggers addObject:@{
|
366
|
-
@"
|
367
|
-
@"
|
366
|
+
@"type": @(trigger.type),
|
367
|
+
@"value": [NSString stringWithUTF8String:trigger.value.c_str()],
|
368
|
+
@"token": @(trigger.token),
|
368
369
|
}];
|
369
370
|
}
|
370
371
|
result[@"grammar_triggers"] = grammar_triggers;
|
@@ -483,25 +484,6 @@
|
|
483
484
|
sparams.grammar_lazy = [params[@"grammar_lazy"] boolValue];
|
484
485
|
}
|
485
486
|
|
486
|
-
if (params[@"grammar_triggers"] && [params[@"grammar_triggers"] isKindOfClass:[NSArray class]]) {
|
487
|
-
NSArray *grammar_triggers = params[@"grammar_triggers"];
|
488
|
-
for (NSDictionary *grammar_trigger in grammar_triggers) {
|
489
|
-
common_grammar_trigger trigger;
|
490
|
-
trigger.word = [grammar_trigger[@"word"] UTF8String];
|
491
|
-
trigger.at_start = [grammar_trigger[@"at_start"] boolValue];
|
492
|
-
|
493
|
-
auto ids = common_tokenize(llama->ctx, trigger.word, /* add_special= */ false, /* parse_special= */ true);
|
494
|
-
if (ids.size() == 1) {
|
495
|
-
// LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str());
|
496
|
-
sparams.grammar_trigger_tokens.push_back(ids[0]);
|
497
|
-
sparams.preserved_tokens.insert(ids[0]);
|
498
|
-
continue;
|
499
|
-
}
|
500
|
-
// LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str());
|
501
|
-
sparams.grammar_trigger_words.push_back(trigger);
|
502
|
-
}
|
503
|
-
}
|
504
|
-
|
505
487
|
if (params[@"preserved_tokens"] && [params[@"preserved_tokens"] isKindOfClass:[NSArray class]]) {
|
506
488
|
NSArray *preserved_tokens = params[@"preserved_tokens"];
|
507
489
|
for (NSString *token in preserved_tokens) {
|
@@ -514,6 +496,40 @@
|
|
514
496
|
}
|
515
497
|
}
|
516
498
|
|
499
|
+
if (params[@"grammar_triggers"] && [params[@"grammar_triggers"] isKindOfClass:[NSArray class]]) {
|
500
|
+
NSArray *grammar_triggers = params[@"grammar_triggers"];
|
501
|
+
for (NSDictionary *grammar_trigger in grammar_triggers) {
|
502
|
+
const auto type = static_cast<common_grammar_trigger_type>([grammar_trigger[@"type"] intValue]);
|
503
|
+
const auto & word = [grammar_trigger[@"value"] UTF8String];
|
504
|
+
|
505
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
|
506
|
+
auto ids = common_tokenize(llama->ctx, word, /* add_special= */ false, /* parse_special= */ true);
|
507
|
+
if (ids.size() == 1) {
|
508
|
+
auto token = ids[0];
|
509
|
+
if (std::find(sparams.preserved_tokens.begin(), sparams.preserved_tokens.end(), (llama_token) token) == sparams.preserved_tokens.end()) {
|
510
|
+
throw std::runtime_error("Grammar trigger word should be marked as preserved token");
|
511
|
+
}
|
512
|
+
common_grammar_trigger trigger;
|
513
|
+
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
|
514
|
+
trigger.value = word;
|
515
|
+
trigger.token = token;
|
516
|
+
sparams.grammar_triggers.push_back(std::move(trigger));
|
517
|
+
} else {
|
518
|
+
sparams.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
|
519
|
+
}
|
520
|
+
} else {
|
521
|
+
common_grammar_trigger trigger;
|
522
|
+
trigger.type = type;
|
523
|
+
trigger.value = word;
|
524
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
525
|
+
const auto token = (llama_token) [grammar_trigger[@"token"] intValue];
|
526
|
+
trigger.token = token;
|
527
|
+
}
|
528
|
+
sparams.grammar_triggers.push_back(std::move(trigger));
|
529
|
+
}
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
517
533
|
llama->params.antiprompt.clear();
|
518
534
|
if (params[@"stop"]) {
|
519
535
|
NSArray *stop = params[@"stop"];
|
package/package.json
CHANGED
package/src/NativeRNLlama.ts
CHANGED
@@ -93,8 +93,9 @@ export type NativeCompletionParams = {
|
|
93
93
|
* Lazy grammar triggers. Default: []
|
94
94
|
*/
|
95
95
|
grammar_triggers?: Array<{
|
96
|
-
|
97
|
-
|
96
|
+
type: number
|
97
|
+
value: string
|
98
|
+
token: number
|
98
99
|
}>
|
99
100
|
preserved_tokens?: Array<string>
|
100
101
|
chat_format?: number
|
@@ -347,8 +348,9 @@ export type JinjaFormattedChatResult = {
|
|
347
348
|
grammar?: string
|
348
349
|
grammar_lazy?: boolean
|
349
350
|
grammar_triggers?: Array<{
|
350
|
-
|
351
|
-
|
351
|
+
type: number
|
352
|
+
value: string
|
353
|
+
token: number
|
352
354
|
}>
|
353
355
|
preserved_tokens?: Array<string>
|
354
356
|
additional_stops?: Array<string>
|
package/src/index.ts
CHANGED
@@ -70,7 +70,8 @@ if (EventEmitter) {
|
|
70
70
|
logListeners.forEach((listener) => listener(evt.level, evt.text))
|
71
71
|
},
|
72
72
|
)
|
73
|
-
|
73
|
+
// Trigger unset to use default log callback
|
74
|
+
RNLlama?.toggleNativeLog?.(false)?.catch?.(() => {})
|
74
75
|
}
|
75
76
|
|
76
77
|
export type TokenData = {
|
@@ -398,6 +399,7 @@ const modelInfoSkip = [
|
|
398
399
|
'tokenizer.ggml.tokens',
|
399
400
|
'tokenizer.ggml.token_type',
|
400
401
|
'tokenizer.ggml.merges',
|
402
|
+
'tokenizer.ggml.scores'
|
401
403
|
]
|
402
404
|
export async function loadLlamaModelInfo(model: string): Promise<Object> {
|
403
405
|
let path = model
|