cui-llama.rn 1.4.6 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +52 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1779
  14. package/cpp/chat.h +9 -1
  15. package/cpp/common.cpp +20 -522
  16. package/cpp/common.h +13 -36
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-common.h +12 -6
  19. package/cpp/ggml-cpu-aarch64.cpp +1557 -80
  20. package/cpp/ggml-cpu-impl.h +2 -21
  21. package/cpp/ggml-cpu-quants.c +904 -405
  22. package/cpp/ggml-cpu.c +909 -13237
  23. package/cpp/ggml-impl.h +50 -23
  24. package/cpp/ggml-metal-impl.h +77 -3
  25. package/cpp/ggml-metal.m +794 -580
  26. package/cpp/ggml.c +92 -3
  27. package/cpp/ggml.h +29 -5
  28. package/cpp/gguf.cpp +1 -0
  29. package/cpp/llama-adapter.cpp +55 -20
  30. package/cpp/llama-adapter.h +11 -9
  31. package/cpp/llama-arch.cpp +217 -16
  32. package/cpp/llama-arch.h +25 -0
  33. package/cpp/llama-batch.h +2 -2
  34. package/cpp/llama-chat.cpp +54 -2
  35. package/cpp/llama-chat.h +3 -0
  36. package/cpp/llama-context.cpp +2294 -1238
  37. package/cpp/llama-context.h +214 -77
  38. package/cpp/llama-cparams.h +1 -0
  39. package/cpp/llama-graph.cpp +1695 -0
  40. package/cpp/llama-graph.h +592 -0
  41. package/cpp/llama-hparams.cpp +8 -0
  42. package/cpp/llama-hparams.h +17 -0
  43. package/cpp/llama-io.cpp +15 -0
  44. package/cpp/llama-io.h +35 -0
  45. package/cpp/llama-kv-cache.cpp +965 -303
  46. package/cpp/llama-kv-cache.h +145 -151
  47. package/cpp/llama-memory.cpp +1 -0
  48. package/cpp/llama-memory.h +21 -0
  49. package/cpp/llama-mmap.cpp +1 -1
  50. package/cpp/llama-model-loader.cpp +10 -5
  51. package/cpp/llama-model-loader.h +5 -3
  52. package/cpp/llama-model.cpp +9194 -201
  53. package/cpp/llama-model.h +40 -1
  54. package/cpp/llama-sampling.cpp +5 -0
  55. package/cpp/llama-vocab.cpp +36 -5
  56. package/cpp/llama.cpp +51 -9984
  57. package/cpp/llama.h +102 -22
  58. package/cpp/log.cpp +34 -0
  59. package/cpp/minja/chat-template.hpp +15 -7
  60. package/cpp/minja/minja.hpp +120 -94
  61. package/cpp/ops.cpp +8723 -0
  62. package/cpp/ops.h +128 -0
  63. package/cpp/rn-llama.cpp +44 -53
  64. package/cpp/rn-llama.h +2 -12
  65. package/cpp/sampling.cpp +3 -0
  66. package/cpp/sgemm.cpp +533 -88
  67. package/cpp/simd-mappings.h +888 -0
  68. package/cpp/speculative.cpp +4 -4
  69. package/cpp/unary-ops.cpp +186 -0
  70. package/cpp/unary-ops.h +28 -0
  71. package/cpp/vec.cpp +258 -0
  72. package/cpp/vec.h +802 -0
  73. package/ios/CMakeLists.txt +5 -2
  74. package/ios/RNLlama.mm +2 -2
  75. package/ios/RNLlamaContext.mm +40 -24
  76. package/package.json +1 -1
  77. package/src/NativeRNLlama.ts +6 -4
  78. package/src/index.ts +3 -1
  79. package/cpp/chat-template.hpp +0 -529
  80. package/cpp/minja.hpp +0 -2915
@@ -62,15 +62,18 @@ add_library(rnllama SHARED
62
62
  ${SOURCE_DIR}/llama-model-loader.cpp
63
63
  ${SOURCE_DIR}/llama-mmap.cpp
64
64
  ${SOURCE_DIR}/llama-vocab.cpp
65
+ ${SOURCE_DIR}/llama-memory.cpp
66
+ ${SOURCE_DIR}/llama-io.cpp
67
+ ${SOURCE_DIR}/llama-graph.cpp
65
68
  ${SOURCE_DIR}/sampling.cpp
66
69
  ${SOURCE_DIR}/unicode-data.cpp
67
70
  ${SOURCE_DIR}/unicode.cpp
68
71
  ${SOURCE_DIR}/sgemm.cpp
69
72
  ${SOURCE_DIR}/common.cpp
70
73
  ${SOURCE_DIR}/chat.cpp
71
- ${SOURCE_DIR}/chat-template.hpp
72
74
  ${SOURCE_DIR}/json-schema-to-grammar.cpp
73
- ${SOURCE_DIR}/minja.hpp
75
+ ${SOURCE_DIR}/minja/minja.hpp
76
+ ${SOURCE_DIR}/minja/chat-template.hpp
74
77
  ${SOURCE_DIR}/json.hpp
75
78
  ${SOURCE_DIR}/amx/amx.cpp
76
79
  ${SOURCE_DIR}/amx/mmq.cpp
package/ios/RNLlama.mm CHANGED
@@ -102,8 +102,8 @@ RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
102
102
  if ([params[@"jinja"] boolValue]) {
103
103
  NSString *jsonSchema = params[@"json_schema"];
104
104
  NSString *tools = params[@"tools"];
105
- NSString *parallelToolCalls = params[@"parallel_tool_calls"];
106
- NSString *toolChoice = params[@"tool_choice"];\
105
+ bool parallelToolCalls = [params[@"parallel_tool_calls"] boolValue];
106
+ NSString *toolChoice = params[@"tool_choice"];
107
107
  resolve([context getFormattedChatWithJinja:messages withChatTemplate:chatTemplate withJsonSchema:jsonSchema withTools:tools withParallelToolCalls:parallelToolCalls withToolChoice:toolChoice]);
108
108
  } else {
109
109
  resolve([context getFormattedChat:messages withChatTemplate:chatTemplate]);
@@ -285,7 +285,7 @@
285
285
  [meta setValue:valStr forKey:keyStr];
286
286
  }
287
287
 
288
- auto template_tool_use = llama->templates.template_tool_use.get();
288
+ auto template_tool_use = llama->templates.get()->template_tool_use.get();
289
289
  NSDictionary *tool_use_caps_dir = nil;
290
290
  if (template_tool_use) {
291
291
  auto tool_use_caps = template_tool_use->original_caps();
@@ -299,7 +299,7 @@
299
299
  };
300
300
  }
301
301
 
302
- auto default_tmpl = llama->templates.template_default.get();
302
+ auto default_tmpl = llama->templates.get()->template_default.get();
303
303
  auto default_tmpl_caps = default_tmpl->original_caps();
304
304
 
305
305
  return @{
@@ -356,15 +356,16 @@
356
356
  parallelToolCalls,
357
357
  toolChoice == nil ? "" : [toolChoice UTF8String]
358
358
  );
359
- result[@"prompt"] = [NSString stringWithUTF8String:chatParams.prompt.get<std::string>().c_str()];
359
+ result[@"prompt"] = [NSString stringWithUTF8String:chatParams.prompt.c_str()];
360
360
  result[@"chat_format"] = @(static_cast<int>(chatParams.format));
361
361
  result[@"grammar"] = [NSString stringWithUTF8String:chatParams.grammar.c_str()];
362
362
  result[@"grammar_lazy"] = @(chatParams.grammar_lazy);
363
363
  NSMutableArray *grammar_triggers = [[NSMutableArray alloc] init];
364
364
  for (const auto & trigger : chatParams.grammar_triggers) {
365
365
  [grammar_triggers addObject:@{
366
- @"word": [NSString stringWithUTF8String:trigger.word.c_str()],
367
- @"at_start": @(trigger.at_start),
366
+ @"type": @(trigger.type),
367
+ @"value": [NSString stringWithUTF8String:trigger.value.c_str()],
368
+ @"token": @(trigger.token),
368
369
  }];
369
370
  }
370
371
  result[@"grammar_triggers"] = grammar_triggers;
@@ -483,25 +484,6 @@
483
484
  sparams.grammar_lazy = [params[@"grammar_lazy"] boolValue];
484
485
  }
485
486
 
486
- if (params[@"grammar_triggers"] && [params[@"grammar_triggers"] isKindOfClass:[NSArray class]]) {
487
- NSArray *grammar_triggers = params[@"grammar_triggers"];
488
- for (NSDictionary *grammar_trigger in grammar_triggers) {
489
- common_grammar_trigger trigger;
490
- trigger.word = [grammar_trigger[@"word"] UTF8String];
491
- trigger.at_start = [grammar_trigger[@"at_start"] boolValue];
492
-
493
- auto ids = common_tokenize(llama->ctx, trigger.word, /* add_special= */ false, /* parse_special= */ true);
494
- if (ids.size() == 1) {
495
- // LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str());
496
- sparams.grammar_trigger_tokens.push_back(ids[0]);
497
- sparams.preserved_tokens.insert(ids[0]);
498
- continue;
499
- }
500
- // LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str());
501
- sparams.grammar_trigger_words.push_back(trigger);
502
- }
503
- }
504
-
505
487
  if (params[@"preserved_tokens"] && [params[@"preserved_tokens"] isKindOfClass:[NSArray class]]) {
506
488
  NSArray *preserved_tokens = params[@"preserved_tokens"];
507
489
  for (NSString *token in preserved_tokens) {
@@ -514,6 +496,40 @@
514
496
  }
515
497
  }
516
498
 
499
+ if (params[@"grammar_triggers"] && [params[@"grammar_triggers"] isKindOfClass:[NSArray class]]) {
500
+ NSArray *grammar_triggers = params[@"grammar_triggers"];
501
+ for (NSDictionary *grammar_trigger in grammar_triggers) {
502
+ const auto type = static_cast<common_grammar_trigger_type>([grammar_trigger[@"type"] intValue]);
503
+ const auto & word = [grammar_trigger[@"value"] UTF8String];
504
+
505
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
506
+ auto ids = common_tokenize(llama->ctx, word, /* add_special= */ false, /* parse_special= */ true);
507
+ if (ids.size() == 1) {
508
+ auto token = ids[0];
509
+ if (std::find(sparams.preserved_tokens.begin(), sparams.preserved_tokens.end(), (llama_token) token) == sparams.preserved_tokens.end()) {
510
+ throw std::runtime_error("Grammar trigger word should be marked as preserved token");
511
+ }
512
+ common_grammar_trigger trigger;
513
+ trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
514
+ trigger.value = word;
515
+ trigger.token = token;
516
+ sparams.grammar_triggers.push_back(std::move(trigger));
517
+ } else {
518
+ sparams.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
519
+ }
520
+ } else {
521
+ common_grammar_trigger trigger;
522
+ trigger.type = type;
523
+ trigger.value = word;
524
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
525
+ const auto token = (llama_token) [grammar_trigger[@"token"] intValue];
526
+ trigger.token = token;
527
+ }
528
+ sparams.grammar_triggers.push_back(std::move(trigger));
529
+ }
530
+ }
531
+ }
532
+
517
533
  llama->params.antiprompt.clear();
518
534
  if (params[@"stop"]) {
519
535
  NSArray *stop = params[@"stop"];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.4.6",
3
+ "version": "1.5.0",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -93,8 +93,9 @@ export type NativeCompletionParams = {
93
93
  * Lazy grammar triggers. Default: []
94
94
  */
95
95
  grammar_triggers?: Array<{
96
- at_start: boolean
97
- word: string
96
+ type: number
97
+ value: string
98
+ token: number
98
99
  }>
99
100
  preserved_tokens?: Array<string>
100
101
  chat_format?: number
@@ -347,8 +348,9 @@ export type JinjaFormattedChatResult = {
347
348
  grammar?: string
348
349
  grammar_lazy?: boolean
349
350
  grammar_triggers?: Array<{
350
- at_start: boolean
351
- word: string
351
+ type: number
352
+ value: string
353
+ token: number
352
354
  }>
353
355
  preserved_tokens?: Array<string>
354
356
  additional_stops?: Array<string>
package/src/index.ts CHANGED
@@ -70,7 +70,8 @@ if (EventEmitter) {
70
70
  logListeners.forEach((listener) => listener(evt.level, evt.text))
71
71
  },
72
72
  )
73
- RNLlama?.toggleNativeLog?.(false) // Trigger unset to use default log callback
73
+ // Trigger unset to use default log callback
74
+ RNLlama?.toggleNativeLog?.(false)?.catch?.(() => {})
74
75
  }
75
76
 
76
77
  export type TokenData = {
@@ -398,6 +399,7 @@ const modelInfoSkip = [
398
399
  'tokenizer.ggml.tokens',
399
400
  'tokenizer.ggml.token_type',
400
401
  'tokenizer.ggml.merges',
402
+ 'tokenizer.ggml.scores'
401
403
  ]
402
404
  export async function loadLlamaModelInfo(model: string): Promise<Object> {
403
405
  let path = model