cui-llama.rn 1.2.6 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +3 -2
  2. package/android/src/main/CMakeLists.txt +20 -5
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +115 -27
  4. package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
  5. package/android/src/main/jni.cpp +222 -34
  6. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
  7. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
  8. package/cpp/common.cpp +1682 -2114
  9. package/cpp/common.h +600 -613
  10. package/cpp/ggml-aarch64.c +129 -3478
  11. package/cpp/ggml-aarch64.h +19 -39
  12. package/cpp/ggml-alloc.c +1040 -1040
  13. package/cpp/ggml-alloc.h +76 -76
  14. package/cpp/ggml-backend-impl.h +216 -216
  15. package/cpp/ggml-backend-reg.cpp +195 -0
  16. package/cpp/ggml-backend.cpp +1997 -2661
  17. package/cpp/ggml-backend.h +328 -314
  18. package/cpp/ggml-common.h +1853 -1853
  19. package/cpp/ggml-cpp.h +38 -38
  20. package/cpp/ggml-cpu-aarch64.c +3560 -0
  21. package/cpp/ggml-cpu-aarch64.h +30 -0
  22. package/cpp/ggml-cpu-impl.h +371 -614
  23. package/cpp/ggml-cpu-quants.c +10822 -0
  24. package/cpp/ggml-cpu-quants.h +63 -0
  25. package/cpp/ggml-cpu.c +13975 -13720
  26. package/cpp/ggml-cpu.cpp +663 -0
  27. package/cpp/ggml-cpu.h +177 -150
  28. package/cpp/ggml-impl.h +550 -296
  29. package/cpp/ggml-metal.h +66 -66
  30. package/cpp/ggml-metal.m +4294 -3933
  31. package/cpp/ggml-quants.c +5247 -15739
  32. package/cpp/ggml-quants.h +100 -147
  33. package/cpp/ggml-threading.cpp +12 -0
  34. package/cpp/ggml-threading.h +12 -0
  35. package/cpp/ggml.c +8180 -8390
  36. package/cpp/ggml.h +2411 -2441
  37. package/cpp/llama-grammar.cpp +1138 -1138
  38. package/cpp/llama-grammar.h +144 -144
  39. package/cpp/llama-impl.h +181 -181
  40. package/cpp/llama-sampling.cpp +2348 -2345
  41. package/cpp/llama-sampling.h +48 -48
  42. package/cpp/llama-vocab.cpp +1984 -1984
  43. package/cpp/llama-vocab.h +170 -170
  44. package/cpp/llama.cpp +22132 -22046
  45. package/cpp/llama.h +1253 -1255
  46. package/cpp/log.cpp +401 -401
  47. package/cpp/log.h +121 -121
  48. package/cpp/rn-llama.hpp +83 -19
  49. package/cpp/sampling.cpp +466 -466
  50. package/cpp/sgemm.cpp +1884 -1276
  51. package/ios/RNLlama.mm +43 -20
  52. package/ios/RNLlamaContext.h +9 -3
  53. package/ios/RNLlamaContext.mm +133 -33
  54. package/jest/mock.js +0 -1
  55. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  56. package/lib/commonjs/index.js +52 -15
  57. package/lib/commonjs/index.js.map +1 -1
  58. package/lib/module/NativeRNLlama.js.map +1 -1
  59. package/lib/module/index.js +51 -15
  60. package/lib/module/index.js.map +1 -1
  61. package/lib/typescript/NativeRNLlama.d.ts +29 -5
  62. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  63. package/lib/typescript/index.d.ts +12 -5
  64. package/lib/typescript/index.d.ts.map +1 -1
  65. package/package.json +1 -1
  66. package/src/NativeRNLlama.ts +41 -6
  67. package/src/index.ts +82 -27
  68. package/cpp/json-schema-to-grammar.cpp +0 -1045
  69. package/cpp/json-schema-to-grammar.h +0 -8
  70. package/cpp/json.hpp +0 -24766
package/ios/RNLlama.mm CHANGED
@@ -21,10 +21,25 @@ RCT_EXPORT_METHOD(setContextLimit:(double)limit
21
21
  resolve(nil);
22
22
  }
23
23
 
24
- RCT_EXPORT_METHOD(initContext:(NSDictionary *)contextParams
24
+ RCT_EXPORT_METHOD(modelInfo:(NSString *)path
25
+ withSkip:(NSArray *)skip
25
26
  withResolver:(RCTPromiseResolveBlock)resolve
26
27
  withRejecter:(RCTPromiseRejectBlock)reject)
27
28
  {
29
+ resolve([RNLlamaContext modelInfo:path skip:skip]);
30
+ }
31
+
32
+ RCT_EXPORT_METHOD(initContext:(double)contextId
33
+ withContextParams:(NSDictionary *)contextParams
34
+ withResolver:(RCTPromiseResolveBlock)resolve
35
+ withRejecter:(RCTPromiseRejectBlock)reject)
36
+ {
37
+ NSNumber *contextIdNumber = [NSNumber numberWithDouble:contextId];
38
+ if (llamaContexts[contextIdNumber] != nil) {
39
+ reject(@"llama_error", @"Context already exists", nil);
40
+ return;
41
+ }
42
+
28
43
  if (llamaDQueue == nil) {
29
44
  llamaDQueue = dispatch_queue_create("com.rnllama", DISPATCH_QUEUE_SERIAL);
30
45
  }
@@ -38,23 +53,27 @@ RCT_EXPORT_METHOD(initContext:(NSDictionary *)contextParams
38
53
  return;
39
54
  }
40
55
 
41
- RNLlamaContext *context = [RNLlamaContext initWithParams:contextParams];
42
- if (![context isModelLoaded]) {
43
- reject(@"llama_cpp_error", @"Failed to load the model", nil);
44
- return;
56
+ @try {
57
+ RNLlamaContext *context = [RNLlamaContext initWithParams:contextParams onProgress:^(unsigned int progress) {
58
+ dispatch_async(dispatch_get_main_queue(), ^{
59
+ [self sendEventWithName:@"@RNLlama_onInitContextProgress" body:@{ @"contextId": @(contextId), @"progress": @(progress) }];
60
+ });
61
+ }];
62
+ if (![context isModelLoaded]) {
63
+ reject(@"llama_cpp_error", @"Failed to load the model", nil);
64
+ return;
65
+ }
66
+
67
+ [llamaContexts setObject:context forKey:contextIdNumber];
68
+
69
+ resolve(@{
70
+ @"gpu": @([context isMetalEnabled]),
71
+ @"reasonNoGPU": [context reasonNoMetal],
72
+ @"model": [context modelInfo],
73
+ });
74
+ } @catch (NSException *exception) {
75
+ reject(@"llama_cpp_error", exception.reason, nil);
45
76
  }
46
-
47
- double contextId = (double) arc4random_uniform(1000000);
48
-
49
- NSNumber *contextIdNumber = [NSNumber numberWithDouble:contextId];
50
- [llamaContexts setObject:context forKey:contextIdNumber];
51
-
52
- resolve(@{
53
- @"contextId": contextIdNumber,
54
- @"gpu": @([context isMetalEnabled]),
55
- @"reasonNoGPU": [context reasonNoMetal],
56
- @"model": [context modelInfo],
57
- });
58
77
  }
59
78
 
60
79
  RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
@@ -125,6 +144,7 @@ RCT_EXPORT_METHOD(saveSession:(double)contextId
125
144
 
126
145
  - (NSArray *)supportedEvents {
127
146
  return@[
147
+ @"@RNLlama_onInitContextProgress",
128
148
  @"@RNLlama_onToken",
129
149
  ];
130
150
  }
@@ -213,6 +233,7 @@ RCT_EXPORT_METHOD(detokenize:(double)contextId
213
233
 
214
234
  RCT_EXPORT_METHOD(embedding:(double)contextId
215
235
  text:(NSString *)text
236
+ params:(NSDictionary *)params
216
237
  withResolver:(RCTPromiseResolveBlock)resolve
217
238
  withRejecter:(RCTPromiseRejectBlock)reject)
218
239
  {
@@ -222,9 +243,8 @@ RCT_EXPORT_METHOD(embedding:(double)contextId
222
243
  return;
223
244
  }
224
245
  @try {
225
- NSMutableArray *embedding = [context embedding:text];
226
- resolve(@{ @"embedding": embedding });
227
- [embedding release];
246
+ NSDictionary *embedding = [context embedding:text params:params];
247
+ resolve(embedding);
228
248
  } @catch (NSException *exception) {
229
249
  reject(@"llama_cpp_error", exception.reason, nil);
230
250
  }
@@ -260,6 +280,9 @@ RCT_EXPORT_METHOD(releaseContext:(double)contextId
260
280
  reject(@"llama_error", @"Context not found", nil);
261
281
  return;
262
282
  }
283
+ if (![context isModelLoaded]) {
284
+ [context interruptLoad];
285
+ }
263
286
  [context stopCompletion];
264
287
  dispatch_barrier_sync(llamaDQueue, ^{});
265
288
  [context invalidate];
@@ -1,18 +1,24 @@
1
1
  #ifdef __cplusplus
2
2
  #import "llama.h"
3
+ #import "llama-impl.h"
4
+ #import "ggml.h"
3
5
  #import "rn-llama.hpp"
4
6
  #endif
5
7
 
6
8
 
7
9
  @interface RNLlamaContext : NSObject {
8
10
  bool is_metal_enabled;
9
- NSString * reason_no_metal;
10
11
  bool is_model_loaded;
12
+ NSString * reason_no_metal;
13
+
14
+ void (^onProgress)(unsigned int progress);
11
15
 
12
16
  rnllama::llama_rn_context * llama;
13
17
  }
14
18
 
15
- + (instancetype)initWithParams:(NSDictionary *)params;
19
+ + (NSDictionary *)modelInfo:(NSString *)path skip:(NSArray *)skip;
20
+ + (instancetype)initWithParams:(NSDictionary *)params onProgress:(void (^)(unsigned int progress))onProgress;
21
+ - (void)interruptLoad;
16
22
  - (bool)isMetalEnabled;
17
23
  - (NSString *)reasonNoMetal;
18
24
  - (NSDictionary *)modelInfo;
@@ -22,7 +28,7 @@
22
28
  - (void)stopCompletion;
23
29
  - (NSArray *)tokenize:(NSString *)text;
24
30
  - (NSString *)detokenize:(NSArray *)tokens;
25
- - (NSArray *)embedding:(NSString *)text;
31
+ - (NSDictionary *)embedding:(NSString *)text params:(NSDictionary *)params;
26
32
  - (NSString *)getFormattedChat:(NSArray *)messages withTemplate:(NSString *)chatTemplate;
27
33
  - (NSDictionary *)loadSession:(NSString *)path;
28
34
  - (int)saveSession:(NSString *)path size:(int)size;
@@ -3,9 +3,53 @@
3
3
 
4
4
  @implementation RNLlamaContext
5
5
 
6
- + (instancetype)initWithParams:(NSDictionary *)params {
6
+ + (NSDictionary *)modelInfo:(NSString *)path skip:(NSArray *)skip {
7
+ struct lm_gguf_init_params params = {
8
+ /*.no_alloc = */ false,
9
+ /*.ctx = */ NULL,
10
+ };
11
+
12
+ struct lm_gguf_context * ctx = lm_gguf_init_from_file([path UTF8String], params);
13
+
14
+ if (!ctx) {
15
+ NSLog(@"%s: failed to load '%s'\n", __func__, [path UTF8String]);
16
+ return @{};
17
+ }
18
+
19
+ NSMutableDictionary *info = [[NSMutableDictionary alloc] init];
20
+
21
+ info[@"version"] = @(lm_gguf_get_version(ctx));
22
+ info[@"alignment"] = @(lm_gguf_get_alignment(ctx));
23
+ info[@"data_offset"] = @(lm_gguf_get_data_offset(ctx));
24
+
25
+ // kv
26
+ {
27
+ const int n_kv = lm_gguf_get_n_kv(ctx);
28
+
29
+ for (int i = 0; i < n_kv; ++i) {
30
+ const char * key = lm_gguf_get_key(ctx, i);
31
+
32
+ if (skip && [skip containsObject:[NSString stringWithUTF8String:key]]) {
33
+ continue;
34
+ }
35
+ const std::string value = rnllama::lm_gguf_kv_to_str(ctx, i);
36
+ info[[NSString stringWithUTF8String:key]] = [NSString stringWithUTF8String:value.c_str()];
37
+ }
38
+ }
39
+
40
+ lm_gguf_free(ctx);
41
+
42
+ return info;
43
+ }
44
+
45
+ + (instancetype)initWithParams:(NSDictionary *)params onProgress:(void (^)(unsigned int progress))onProgress {
7
46
  // llama_backend_init(false);
8
- gpt_params defaultParams;
47
+ common_params defaultParams;
48
+
49
+ if (params[@"vocab_only"]) {
50
+ defaultParams.vocab_only = [params[@"vocab_only"] boolValue];
51
+ defaultParams.warmup = false;
52
+ }
9
53
 
10
54
  NSString *modelPath = params[@"model"];
11
55
  BOOL isAsset = [params[@"is_model_asset"] boolValue];
@@ -13,10 +57,6 @@
13
57
  if (isAsset) path = [[NSBundle mainBundle] pathForResource:modelPath ofType:nil];
14
58
  defaultParams.model = [path UTF8String];
15
59
 
16
- if (params[@"embedding"] && [params[@"embedding"] boolValue]) {
17
- defaultParams.embedding = true;
18
- }
19
-
20
60
  if (params[@"n_ctx"]) defaultParams.n_ctx = [params[@"n_ctx"] intValue];
21
61
  if (params[@"use_mlock"]) defaultParams.use_mlock = [params[@"use_mlock"]boolValue];
22
62
 
@@ -56,35 +96,79 @@
56
96
  if (params[@"n_batch"]) defaultParams.n_batch = [params[@"n_batch"] intValue];
57
97
  if (params[@"use_mmap"]) defaultParams.use_mmap = [params[@"use_mmap"] boolValue];
58
98
 
99
+ if (params[@"pooling_type"] && [params[@"pooling_type"] isKindOfClass:[NSNumber class]]) {
100
+ defaultParams.pooling_type = static_cast<enum llama_pooling_type>([params[@"pooling_type"] intValue]);
101
+ }
102
+
103
+ if (params[@"embedding"] && [params[@"embedding"] boolValue]) {
104
+ defaultParams.embedding = true;
105
+ // For non-causal models, batch size must be equal to ubatch size
106
+ defaultParams.n_ubatch = defaultParams.n_batch;
107
+
108
+ if (params[@"embd_normalize"] && [params[@"embd_normalize"] isKindOfClass:[NSNumber class]]) {
109
+ defaultParams.embd_normalize = [params[@"embd_normalize"] intValue];
110
+ }
111
+ }
112
+
59
113
  if (params[@"lora"]) {
60
114
  float lora_scaled = 1.0f;
61
115
  if (params[@"lora_scaled"]) lora_scaled = [params[@"lora_scaled"] floatValue];
62
- defaultParams.lora_adapter.push_back({[params[@"lora"] UTF8String], lora_scaled});
63
- defaultParams.use_mmap = false;
116
+ defaultParams.lora_adapters.push_back({[params[@"lora"] UTF8String], lora_scaled});
64
117
  }
65
118
 
66
119
  if (params[@"rope_freq_base"]) defaultParams.rope_freq_base = [params[@"rope_freq_base"] floatValue];
67
120
  if (params[@"rope_freq_scale"]) defaultParams.rope_freq_scale = [params[@"rope_freq_scale"] floatValue];
68
121
 
69
- if (params[@"seed"]) defaultParams.seed = [params[@"seed"] intValue];
122
+ if (params[@"flash_attn"] && [params[@"flash_attn"] boolValue]) defaultParams.flash_attn = true;
123
+
124
+ if (params[@"cache_type_k"]) defaultParams.cache_type_k = [params[@"cache_type_k"] UTF8String];
125
+ if (params[@"cache_type_v"]) defaultParams.cache_type_v = [params[@"cache_type_v"] UTF8String];
70
126
 
71
127
  int nThreads = params[@"n_threads"] ? [params[@"n_threads"] intValue] : 0;
72
128
  const int maxThreads = (int) [[NSProcessInfo processInfo] processorCount];
73
129
  // Use 2 threads by default on 4-core devices, 4 threads on more cores
74
130
  const int defaultNThreads = nThreads == 4 ? 2 : MIN(4, maxThreads);
75
- defaultParams.n_threads = nThreads > 0 ? nThreads : defaultNThreads;
131
+ defaultParams.cpuparams.n_threads = nThreads > 0 ? nThreads : defaultNThreads;
76
132
 
77
133
  RNLlamaContext *context = [[RNLlamaContext alloc] init];
78
- if (context->llama == nullptr) {
79
- context->llama = new rnllama::llama_rn_context();
134
+ context->llama = new rnllama::llama_rn_context();
135
+ context->llama->is_load_interrupted = false;
136
+ context->llama->loading_progress = 0;
137
+ context->onProgress = onProgress;
138
+
139
+ if (params[@"use_progress_callback"] && [params[@"use_progress_callback"] boolValue]) {
140
+ defaultParams.progress_callback = [](float progress, void * user_data) {
141
+ RNLlamaContext *context = (__bridge RNLlamaContext *)(user_data);
142
+ unsigned percentage = (unsigned) (100 * progress);
143
+ if (percentage > context->llama->loading_progress) {
144
+ context->llama->loading_progress = percentage;
145
+ context->onProgress(percentage);
146
+ }
147
+ return !context->llama->is_load_interrupted;
148
+ };
149
+ defaultParams.progress_callback_user_data = context;
80
150
  }
151
+
81
152
  context->is_model_loaded = context->llama->loadModel(defaultParams);
153
+
154
+ if (
155
+ params[@"embedding"] && [params[@"embedding"] boolValue] &&
156
+ llama_model_has_encoder(context->llama->model) && llama_model_has_decoder(context->llama->model)
157
+ ) {
158
+ delete context->llama;
159
+ @throw [NSException exceptionWithName:@"LlamaException" reason:@"Embedding is not supported in encoder-decoder models" userInfo:nil];
160
+ }
161
+
82
162
  context->is_metal_enabled = isMetalEnabled;
83
163
  context->reason_no_metal = reasonNoMetal;
84
164
 
85
165
  return context;
86
166
  }
87
167
 
168
+ - (void)interruptLoad {
169
+ llama->is_load_interrupted = true;
170
+ }
171
+
88
172
  - (bool)isMetalEnabled {
89
173
  return is_metal_enabled;
90
174
  }
@@ -128,7 +212,7 @@
128
212
  }
129
213
 
130
214
  - (NSString *)getFormattedChat:(NSArray *)messages withTemplate:(NSString *)chatTemplate {
131
- std::vector<llama_chat_msg> chat;
215
+ std::vector<common_chat_msg> chat;
132
216
 
133
217
  for (NSDictionary *msg in messages) {
134
218
  std::string role = [[msg objectForKey:@"role"] UTF8String];
@@ -137,7 +221,7 @@
137
221
  }
138
222
 
139
223
  auto tmpl = chatTemplate == nil ? "" : [chatTemplate UTF8String];
140
- auto formatted_chat = llama_chat_apply_template(llama->model, tmpl, chat, true);
224
+ auto formatted_chat = common_chat_apply_template(llama->model, tmpl, chat, true);
141
225
  return [NSString stringWithUTF8String:formatted_chat.c_str()];
142
226
  }
143
227
 
@@ -168,21 +252,22 @@
168
252
  {
169
253
  llama->rewind();
170
254
 
171
- llama_reset_timings(llama->ctx);
255
+ //llama_reset_timings(llama->ctx);
172
256
 
173
257
  NSString *prompt = [params objectForKey:@"prompt"];
174
258
 
175
259
  llama->params.prompt = [prompt UTF8String];
176
- llama->params.seed = params[@"seed"] ? [params[@"seed"] intValue] : -1;
260
+ llama->params.sparams.seed = params[@"seed"] ? [params[@"seed"] intValue] : -1;
177
261
 
178
262
  if (params[@"n_threads"]) {
179
- int nThreads = params[@"n_threads"] ? [params[@"n_threads"] intValue] : llama->params.n_threads;
263
+ int nThreads = params[@"n_threads"] ? [params[@"n_threads"] intValue] : llama->params.cpuparams.n_threads;
180
264
  const int maxThreads = (int) [[NSProcessInfo processInfo] processorCount];
181
265
  // Use 2 threads by default on 4-core devices, 4 threads on more cores
182
266
  const int defaultNThreads = nThreads == 4 ? 2 : MIN(4, maxThreads);
183
- llama->params.n_threads = nThreads > 0 ? nThreads : defaultNThreads;
267
+ llama->params.cpuparams.n_threads = nThreads > 0 ? nThreads : defaultNThreads;
184
268
  }
185
269
  if (params[@"n_predict"]) llama->params.n_predict = [params[@"n_predict"] intValue];
270
+ if (params[@"ignore_eos"]) llama->params.sparams.ignore_eos = [params[@"ignore_eos"] boolValue];
186
271
 
187
272
  auto & sparams = llama->params.sparams;
188
273
 
@@ -203,9 +288,9 @@
203
288
  if (params[@"top_k"]) sparams.top_k = [params[@"top_k"] intValue];
204
289
  if (params[@"top_p"]) sparams.top_p = [params[@"top_p"] doubleValue];
205
290
  if (params[@"min_p"]) sparams.min_p = [params[@"min_p"] doubleValue];
206
- if (params[@"tfs_z"]) sparams.tfs_z = [params[@"tfs_z"] doubleValue];
207
-
208
- if (params[@"typical_p"]) sparams.typical_p = [params[@"typical_p"] doubleValue];
291
+ if (params[@"xtc_threshold"]) sparams.xtc_threshold = [params[@"xtc_threshold"] doubleValue];
292
+ if (params[@"xtc_probability"]) sparams.xtc_probability = [params[@"xtc_probability"] doubleValue];
293
+ if (params[@"typical_p"]) sparams.typ_p = [params[@"typical_p"] doubleValue];
209
294
 
210
295
  if (params[@"grammar"]) {
211
296
  sparams.grammar = [params[@"grammar"] UTF8String];
@@ -221,7 +306,7 @@
221
306
 
222
307
  sparams.logit_bias.clear();
223
308
  if (params[@"ignore_eos"] && [params[@"ignore_eos"] boolValue]) {
224
- sparams.logit_bias[llama_token_eos(llama->model)] = -INFINITY;
309
+ sparams.logit_bias[llama_token_eos(llama->model)].bias = -INFINITY;
225
310
  }
226
311
 
227
312
  if (params[@"logit_bias"] && [params[@"logit_bias"] isKindOfClass:[NSArray class]]) {
@@ -232,9 +317,9 @@
232
317
  llama_token tok = [el[0] intValue];
233
318
  if (tok >= 0 && tok < n_vocab) {
234
319
  if ([el[1] isKindOfClass:[NSNumber class]]) {
235
- sparams.logit_bias[tok] = [el[1] doubleValue];
320
+ sparams.logit_bias[tok].bias = [el[1] doubleValue];
236
321
  } else if ([el[1] isKindOfClass:[NSNumber class]] && ![el[1] boolValue]) {
237
- sparams.logit_bias[tok] = -INFINITY;
322
+ sparams.logit_bias[tok].bias = -INFINITY;
238
323
  }
239
324
  }
240
325
  }
@@ -255,7 +340,7 @@
255
340
  if (token_with_probs.tok == -1 || llama->incomplete) {
256
341
  continue;
257
342
  }
258
- const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);
343
+ const std::string token_text = common_token_to_piece(llama->ctx, token_with_probs.tok);
259
344
 
260
345
  size_t pos = std::min(sent_count, llama->generated_text.size());
261
346
 
@@ -290,7 +375,7 @@
290
375
  tokenResult[@"token"] = [NSString stringWithUTF8String:to_send.c_str()];
291
376
 
292
377
  if (llama->params.sparams.n_probs > 0) {
293
- const std::vector<llama_token> to_send_toks = llama_tokenize(llama->ctx, to_send, false);
378
+ const std::vector<llama_token> to_send_toks = common_tokenize(llama->ctx, to_send, false);
294
379
  size_t probs_pos = std::min(sent_token_probs_index, llama->generated_token_probs.size());
295
380
  size_t probs_stop_pos = std::min(sent_token_probs_index + to_send_toks.size(), llama->generated_token_probs.size());
296
381
  if (probs_pos < probs_stop_pos) {
@@ -305,10 +390,10 @@
305
390
  }
306
391
  }
307
392
 
308
- llama_print_timings(llama->ctx);
393
+ llama_perf_context_print(llama->ctx);
309
394
  llama->is_predicting = false;
310
395
 
311
- const auto timings = llama_get_timings(llama->ctx);
396
+ const auto timings = llama_perf_context(llama->ctx);
312
397
  return @{
313
398
  @"text": [NSString stringWithUTF8String:llama->generated_text.c_str()],
314
399
  @"completion_probabilities": [self tokenProbsToDict:llama->generated_token_probs],
@@ -339,7 +424,7 @@
339
424
  }
340
425
 
341
426
  - (NSArray *)tokenize:(NSString *)text {
342
- const std::vector<llama_token> toks = llama_tokenize(llama->ctx, [text UTF8String], false);
427
+ const std::vector<llama_token> toks = common_tokenize(llama->ctx, [text UTF8String], false);
343
428
  NSMutableArray *result = [[NSMutableArray alloc] init];
344
429
  for (llama_token tok : toks) {
345
430
  [result addObject:@(tok)];
@@ -356,14 +441,22 @@
356
441
  return [NSString stringWithUTF8String:text.c_str()];
357
442
  }
358
443
 
359
- - (NSArray *)embedding:(NSString *)text {
444
+ - (NSDictionary *)embedding:(NSString *)text params:(NSDictionary *)params {
360
445
  if (llama->params.embedding != true) {
361
446
  @throw [NSException exceptionWithName:@"LlamaException" reason:@"Embedding is not enabled" userInfo:nil];
362
447
  }
363
448
 
449
+ common_params embdParams;
450
+ embdParams.embedding = true;
451
+ embdParams.embd_normalize = llama->params.embd_normalize;
452
+
453
+ if (params[@"embd_normalize"] && [params[@"embd_normalize"] isKindOfClass:[NSNumber class]]) {
454
+ embdParams.embd_normalize = [params[@"embd_normalize"] intValue];
455
+ }
456
+
364
457
  llama->rewind();
365
458
 
366
- llama_reset_timings(llama->ctx);
459
+ llama_perf_context_reset(llama->ctx);
367
460
 
368
461
  llama->params.prompt = [text UTF8String];
369
462
 
@@ -376,15 +469,22 @@
376
469
  llama->loadPrompt();
377
470
  llama->doCompletion();
378
471
 
379
- std::vector<float> result = llama->getEmbedding();
472
+ std::vector<float> result = llama->getEmbedding(embdParams);
380
473
 
474
+ NSMutableDictionary *resultDict = [[NSMutableDictionary alloc] init];
381
475
  NSMutableArray *embeddingResult = [[NSMutableArray alloc] init];
382
476
  for (float f : result) {
383
477
  [embeddingResult addObject:@(f)];
384
478
  }
479
+ resultDict[@"embedding"] = embeddingResult;
480
+ NSMutableArray *promptTokens = [[NSMutableArray alloc] init];
481
+ for (llama_token tok : llama->embd) {
482
+ [promptTokens addObject:[NSString stringWithUTF8String:common_token_to_piece(llama->ctx, tok).c_str()]];
483
+ }
484
+ resultDict[@"prompt_tokens"] = promptTokens;
385
485
 
386
486
  llama->is_predicting = false;
387
- return embeddingResult;
487
+ return resultDict;
388
488
  }
389
489
 
390
490
  - (NSDictionary *)loadSession:(NSString *)path {
package/jest/mock.js CHANGED
@@ -4,7 +4,6 @@ if (!NativeModules.RNLlama) {
4
4
  NativeModules.RNLlama = {
5
5
  initContext: jest.fn(() =>
6
6
  Promise.resolve({
7
- contextId: 1,
8
7
  gpu: false,
9
8
  reasonNoGPU: 'Test',
10
9
  }),
@@ -1 +1 @@
1
- {"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GAqKnCC,gCAAmB,CAACC,GAAG,CAAO,SAAS,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
1
+ {"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GAwMnCC,gCAAmB,CAACC,GAAG,CAAO,SAAS,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
@@ -18,6 +18,7 @@ Object.defineProperty(exports, "convertJsonSchemaToGrammar", {
18
18
  });
19
19
  exports.getCpuFeatures = getCpuFeatures;
20
20
  exports.initLlama = initLlama;
21
+ exports.loadLlamaModelInfo = loadLlamaModelInfo;
21
22
  exports.releaseAllLlama = releaseAllLlama;
22
23
  exports.setContextLimit = setContextLimit;
23
24
  var _reactNative = require("react-native");
@@ -25,8 +26,8 @@ var _NativeRNLlama = _interopRequireDefault(require("./NativeRNLlama"));
25
26
  var _grammar = require("./grammar");
26
27
  var _chat = require("./chat");
27
28
  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
29
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress';
28
30
  const EVENT_ON_TOKEN = '@RNLlama_onToken';
29
- const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress';
30
31
  let EventEmitter;
31
32
  if (_reactNative.Platform.OS === 'ios') {
32
33
  // @ts-ignore
@@ -67,16 +68,18 @@ class LlamaContext {
67
68
  async saveSession(filepath, options) {
68
69
  return _NativeRNLlama.default.saveSession(this.id, filepath, (options === null || options === void 0 ? void 0 : options.tokenSize) || -1);
69
70
  }
70
- async getFormattedChat(messages) {
71
+ async getFormattedChat(messages, template) {
71
72
  var _this$model;
72
73
  const chat = (0, _chat.formatChat)(messages);
73
- return _NativeRNLlama.default.getFormattedChat(this.id, chat, (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml');
74
+ let tmpl = (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml';
75
+ if (template) tmpl = template; // Force replace if provided
76
+ return _NativeRNLlama.default.getFormattedChat(this.id, chat, tmpl);
74
77
  }
75
78
  async completion(params, callback) {
76
79
  let finalPrompt = params.prompt;
77
80
  if (params.messages) {
78
81
  // messages always win
79
- finalPrompt = await this.getFormattedChat(params.messages);
82
+ finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate);
80
83
  }
81
84
  let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => {
82
85
  const {
@@ -116,8 +119,8 @@ class LlamaContext {
116
119
  detokenize(tokens) {
117
120
  return _NativeRNLlama.default.detokenize(this.id, tokens);
118
121
  }
119
- embedding(text) {
120
- return _NativeRNLlama.default.embedding(this.id, text);
122
+ embedding(text, params) {
123
+ return _NativeRNLlama.default.embedding(this.id, text, params || {});
121
124
  }
122
125
  async bench(pp, tg, pl, nr) {
123
126
  const result = await _NativeRNLlama.default.bench(this.id, pp, tg, pl, nr);
@@ -143,30 +146,64 @@ async function getCpuFeatures() {
143
146
  async function setContextLimit(limit) {
144
147
  return _NativeRNLlama.default.setContextLimit(limit);
145
148
  }
146
- async function initLlama(_ref2, progressCallback) {
149
+ let contextIdCounter = 0;
150
+ const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000);
151
+ const modelInfoSkip = [
152
+ // Large fields
153
+ 'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges'];
154
+ async function loadLlamaModelInfo(model) {
155
+ let path = model;
156
+ if (path.startsWith('file://')) path = path.slice(7);
157
+ return _NativeRNLlama.default.modelInfo(path, modelInfoSkip);
158
+ }
159
+ const poolTypeMap = {
160
+ // -1 is unspecified as undefined
161
+ none: 0,
162
+ mean: 1,
163
+ cls: 2,
164
+ last: 3,
165
+ rank: 4
166
+ };
167
+ async function initLlama(_ref2, onProgress) {
168
+ var _loraPath, _removeProgressListen2;
147
169
  let {
148
170
  model,
149
171
  is_model_asset: isModelAsset,
172
+ pooling_type: poolingType,
173
+ lora,
150
174
  ...rest
151
175
  } = _ref2;
152
176
  let path = model;
153
177
  if (path.startsWith('file://')) path = path.slice(7);
154
- const modelProgressListener = EventEmitter.addListener(EVENT_ON_MODEL_PROGRESS, event => {
155
- if (event.progress && progressCallback) progressCallback(event.progress);
156
- if (event.progress === 100) {
157
- modelProgressListener.remove();
158
- }
159
- });
178
+ let loraPath = lora;
179
+ if ((_loraPath = loraPath) !== null && _loraPath !== void 0 && _loraPath.startsWith('file://')) loraPath = loraPath.slice(7);
180
+ const contextId = contextIdCounter + contextIdRandom();
181
+ contextIdCounter += 1;
182
+ let removeProgressListener = null;
183
+ if (onProgress) {
184
+ removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => {
185
+ if (evt.contextId !== contextId) return;
186
+ onProgress(evt.progress);
187
+ });
188
+ }
189
+ const poolType = poolTypeMap[poolingType];
160
190
  const {
161
- contextId,
162
191
  gpu,
163
192
  reasonNoGPU,
164
193
  model: modelDetails
165
- } = await _NativeRNLlama.default.initContext({
194
+ } = await _NativeRNLlama.default.initContext(contextId, {
166
195
  model: path,
167
196
  is_model_asset: !!isModelAsset,
197
+ use_progress_callback: !!onProgress,
198
+ pooling_type: poolType,
199
+ lora: loraPath,
168
200
  ...rest
201
+ }).catch(err => {
202
+ var _removeProgressListen;
203
+ (_removeProgressListen = removeProgressListener) === null || _removeProgressListen === void 0 ? void 0 : _removeProgressListen.remove();
204
+ throw err;
169
205
  });
206
+ (_removeProgressListen2 = removeProgressListener) === null || _removeProgressListen2 === void 0 ? void 0 : _removeProgressListen2.remove();
170
207
  return new LlamaContext({
171
208
  contextId,
172
209
  gpu,
@@ -1 +1 @@
1
- {"version":3,"names":["_reactNative","require","_NativeRNLlama","_interopRequireDefault","_grammar","_chat","obj","__esModule","default","EVENT_ON_TOKEN","EVENT_ON_MODEL_PROGRESS","EventEmitter","Platform","OS","NativeEventEmitter","RNLlama","DeviceEventEmitter","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","_this$model","chat","formatChat","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","exports","getCpuFeatures","setContextLimit","limit","initLlama","_ref2","progressCallback","is_model_asset","isModelAsset","rest","modelProgressListener","event","progress","modelDetails","initContext","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAEA,IAAAC,cAAA,GAAAC,sBAAA,CAAAF,OAAA;AAYA,IAAAG,QAAA,GAAAH,OAAA;AAEA,IAAAI,KAAA,GAAAJ,OAAA;AAAmC,SAAAE,uBAAAG,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,KAAAE,OAAA,EAAAF,GAAA;AAInC,MAAMG,cAAc,GAAG,kBAAkB;AAEzC,MAAMC,uBAAuB,GAAG,0BAA0B;AAE1D,IAAIC,YAA2D;AAC/D,IAAIC,qBAAQ,CAACC,EAAE,KAAK,KAAK,EAAE;EACzB;EACAF,YAAY,GAAG,IAAIG,+BAAkB,CAACC,sBAAO,CAAC;AAChD;AACA,IAAIH,qBAAQ,CAACC,EAAE,KAAK,SAAS,EAAE;EAC7BF,YAAY,GAAGK,+BAAkB;AACnC;AAgCO,MAAMC,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOd,sBAAO,CAACU,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOhB,sBAAO,CAACe,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACtB;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAG,IAAAC,gBAAU,EAACH,QAAQ,CAAC;IACjC,OAAOnB,sBAAO,CAACkB,gBAAgB,CAC7B,IAAI,CAACT,EAAE,EACPY,IAAI,EACJ,CAAAD,WAAA,OAAI,CAACf,KAAK,cAAAe,WAAA,eAAVA,WAAA,CAAYG,uBAAuB,GAAGC,SAAS,GAAG,QACpD,CAAC;EACH;EAEA,MAAMC,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IAEjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACP,QAAQ,EAAE;MAAE;MACrBS,WAAW,GAAG,MAAM,IAAI,CAACV,gBAAgB,CAACQ,MAAM,CAACP,QAAQ,CAAC;IAC5D;IAEA,IAAIW,aAAkB,GACpBH,QAAQ,IACR/B,YAAY,CAACmC,WAAW,CAACrC,cAAc,EAAGsC,GAAqB,IAAK;MAClE,MAAM;QAAExB,SAAS;QAAEyB;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAIxB,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BkB,QAAQ,CAACM,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACL,WAAW,EAAE,MAAM,IAAIM,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAGnC,sBAAO,CAACyB,UAAU,CAAC,IAAI,CAAChB,EAAE,EAAE;MAC1C,GAAGiB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBQ,uBAAuB,EAAE,CAAC,CAACT;IAC7B,CAAC,CAAC;IACF,OAAOQ,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAO5C,sBAAO,CAAC4C,cAAc,CAAC,IAAI,CAACnC,EAAE,CAAC;EACxC;EAEAoC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAO9C,sBAAO,CAAC6C,aAAa,CAAC,IAAI,CAACpC,EAAE,EAAEqC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAO9C,sBAAO,CAAC+C,YAAY,CAAC,IAAI,CAACtC,EAAE,EAAEqC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOjD,sBAAO,CAACgD,UAAU,CAAC,IAAI,CAACvC,EAAE,EAAEwC,MAAM,CAAC;EAC5C;EAEAC,SAASA,CAACJ,IAAY,EAAkC;IACtD,OAAO9C,sBAAO,CAACkD,SAAS,CAAC,IAAI,CAACzC,EAAE,EAAEqC,IAAI,CAAC;EACzC;EAEA,MAAMK,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAMxD,sBAAO,CAACmD,KAAK,CAAC,IAAI,CAAC1C,EAAE,EAAE2C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAOlE,sBAAO,CAACmE,cAAc,CAAC,IAAI,CAAC1D,EAAE,CAAC;EACxC;AACF;AAAC2D,OAAA,CAAAlE,YAAA,GAAAA,YAAA;AAEM,eAAemE,cAAcA,CAAA,EAAgC;EAClE,OAAOrE,sBAAO,CAACqE,cAAc,CAAC,CAAC;AACjC;AAEO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAOvE,sBAAO,CAACsE,eAAe,CAACC,KAAK,CAAC;AACvC;AAEO,eAAeC,SAASA,CAAAC,KAAA,EAK7BC,gBAA6C,EACtB;EAAA,IANO;IAC5BrE,KAAK;IACLsE,cAAc,EAAEC,YAAY;IAC5B,GAAGC;EACU,CAAC,GAAAJ,KAAA;EAGhB,IAAI7D,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,MAAMgE,qBAAqB,GAAGlF,YAAY,CAACmC,WAAW,CAACpC,uBAAuB,EAAGoF,KAAK,IAAK;IACzF,IAAGA,KAAK,CAACC,QAAQ,IAAIN,gBAAgB,EACnCA,gBAAgB,CAACK,KAAK,CAACC,QAAQ,CAAC;IAClC,IAAGD,KAAK,CAACC,QAAQ,KAAK,GAAG,EAAE;MACzBF,qBAAqB,CAACtC,MAAM,CAAC,CAAC;IAChC;EACF,CAAC,CAAC;EAEF,MAAM;IACJhC,SAAS;IACTL,GAAG;IACHC,WAAW;IACXC,KAAK,EAAE4E;EACT,CAAC,GAAG,MAAMjF,sBAAO,CAACkF,WAAW,CAAC;IAC5B7E,KAAK,EAAEO,IAAI;IACX+D,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9B,GAAGC;EACL,CAAC,CAAC;EAEF,OAAO,IAAI3E,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAE4E;EAAa,CAAC,CAAC;AAC/E;AAEO,eAAeE,eAAeA,CAAA,EAAkB;EACrD,OAAOnF,sBAAO,CAACoF,kBAAkB,CAAC,CAAC;AACrC"}
1
+ {"version":3,"names":["_reactNative","require","_NativeRNLlama","_interopRequireDefault","_grammar","_chat","obj","__esModule","default","EVENT_ON_INIT_CONTEXT_PROGRESS","EVENT_ON_TOKEN","EventEmitter","Platform","OS","NativeEventEmitter","RNLlama","DeviceEventEmitter","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","template","_this$model","chat","formatChat","tmpl","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","chatTemplate","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","exports","getCpuFeatures","setContextLimit","limit","contextIdCounter","contextIdRandom","process","env","NODE_ENV","Math","floor","random","modelInfoSkip","loadLlamaModelInfo","modelInfo","poolTypeMap","none","mean","cls","last","rank","initLlama","_ref2","onProgress","_loraPath","_removeProgressListen2","is_model_asset","isModelAsset","pooling_type","poolingType","lora","rest","loraPath","removeProgressListener","progress","poolType","modelDetails","initContext","use_progress_callback","_removeProgressListen","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAEA,IAAAC,cAAA,GAAAC,sBAAA,CAAAF,OAAA;AAaA,IAAAG,QAAA,GAAAH,OAAA;AAEA,IAAAI,KAAA,GAAAJ,OAAA;AAAmC,SAAAE,uBAAAG,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,KAAAE,OAAA,EAAAF,GAAA;AAInC,MAAMG,8BAA8B,GAAG,gCAAgC;AACvE,MAAMC,cAAc,GAAG,kBAAkB;AAEzC,IAAIC,YAA2D;AAC/D,IAAIC,qBAAQ,CAACC,EAAE,KAAK,KAAK,EAAE;EACzB;EACAF,YAAY,GAAG,IAAIG,+BAAkB,CAACC,sBAAO,CAAC;AAChD;AACA,IAAIH,qBAAQ,CAACC,EAAE,KAAK,SAAS,EAAE;EAC7BF,YAAY,GAAGK,+BAAkB;AACnC;AA0CO,MAAMC,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOd,sBAAO,CAACU,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOhB,sBAAO,CAACe,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACvCC,QAAiB,EACA;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAG,IAAAC,gBAAU,EAACJ,QAAQ,CAAC;IACjC,IAAIK,IAAI,GAAG,CAAAH,WAAA,OAAI,CAAChB,KAAK,cAAAgB,WAAA,eAAVA,WAAA,CAAYI,uBAAuB,GAAGC,SAAS,GAAG,QAAQ;IACrE,IAAIN,QAAQ,EAAEI,IAAI,GAAGJ,QAAQ,EAAC;IAC9B,OAAOpB,sBAAO,CAACkB,gBAAgB,CAAC,IAAI,CAACT,EAAE,EAAEa,IAAI,EAAEE,IAAI,CAAC;EACtD;EAEA,MAAMG,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IACjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACT,QAAQ,EAAE;MACnB;MACAW,WAAW,GAAG,MAAM,IAAI,CAACZ,gBAAgB,CAACU,MAAM,CAACT,QAAQ,EAAES,MAAM,CAACI,YAAY,CAAC;IACjF;IAEA,IAAIC,aAAkB,GACpBJ,QAAQ,IACRjC,YAAY,CAACsC,WAAW,CAACvC,cAAc,EAAGwC,GAAqB,IAAK;MAClE,MAAM;QAAE3B,SAAS;QAAE4B;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAI3B,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BoB,QAAQ,CAACO,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACN,WAAW,EAAE,MAAM,IAAIO,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAGtC,sBAAO,CAAC2B,UAAU,CAAC,IAAI,CAAClB,EAAE,EAAE;MAC1C,GAAGmB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBS,uBAAuB,EAAE,CAAC,CAACV;IAC7B,CAAC,CAAC;IACF,OAAOS,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAO/C,sBAAO,CAAC+C,cAAc,CAAC,IAAI,CAACtC,EAAE,CAAC;EACxC;EAEAuC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAOjD,sBAAO,CAACgD,aAAa,CAAC,IAAI,CAACvC,EAAE,EAAEwC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAOjD,sBAAO,CAACkD,YAAY,CAAC,IAAI,CAACzC,EAAE,EAAEwC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOpD,sBAAO,CAACmD,UAAU,CAAC,IAAI,CAAC1C,EAAE,EAAE2C,MAAM,CAAC;EAC5C;EAEAC,SAASA,CACPJ,IAAY,EACZrB,MAAwB,EACQ;IAChC,OAAO5B,sBAAO,CAACqD,SAAS,CAAC,IAAI,CAAC5C,EAAE,EAAEwC,IAAI,EAAErB,MAAM,IAAI,CAAC,CAAC,CAAC;EACvD;EAEA,MAAM0B,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAM3D,sBAAO,CAACsD,KAAK,CAAC,IAAI,CAAC7C,EAAE,EAAE8C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAOrE,sBAAO,CAACsE,cAAc,CAAC,IAAI,CAAC7D,EAAE,CAAC;EACxC;AACF;AAAC8D,OAAA,CAAArE,YAAA,GAAAA,YAAA;AAEM,eAAesE,cAAcA,CAAA,EAAgC;EAClE,OAAOxE,sBAAO,CAACwE,cAAc,CAAC,CAAC;AACjC;AAEO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAO1E,sBAAO,CAACyE,eAAe,CAACC,KAAK,CAAC;AACvC;AAEA,IAAIC,gBAAgB,GAAG,CAAC;AACxB,MAAMC,eAAe,GAAGA,CAAA,KACtBC,OAAO,CAACC,GAAG,CAACC,QAAQ,KAAK,MAAM,GAAG,CAAC,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC;AAE1E,MAAMC,aAAa,GAAG;AACpB;AACA,uBAAuB,EACvB,2BAA2B,EAC3B,uBAAuB,CACxB;AACM,eAAeC,kBAAkBA,CAAC/E,KAAa,EAAmB;EACvE,IAAIO,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EACpD,OAAOd,sBAAO,CAACqF,SAAS,CAACzE,IAAI,EAAEuE,aAAa,CAAC;AAC/C;AAEA,MAAMG,WAAW,GAAG;EAClB;EACAC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE,CAAC;EACPC,GAAG,EAAE,CAAC;EACNC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE;AACR,CAAC;AAEM,eAAeC,SAASA,CAAAC,KAAA,EAQ7BC,UAAuC,EAChB;EAAA,IAAAC,SAAA,EAAAC,sBAAA;EAAA,IARvB;IACE3F,KAAK;IACL4F,cAAc,EAAEC,YAAY;IAC5BC,YAAY,EAAEC,WAAW;IACzBC,IAAI;IACJ,GAAGC;EACU,CAAC,GAAAT,KAAA;EAGhB,IAAIjF,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,IAAIyF,QAAQ,GAAGF,IAAI;EACnB,KAAAN,SAAA,GAAIQ,QAAQ,cAAAR,SAAA,eAARA,SAAA,CAAUlF,UAAU,CAAC,SAAS,CAAC,EAAE0F,QAAQ,GAAGA,QAAQ,CAACzF,KAAK,CAAC,CAAC,CAAC;EAEjE,MAAMN,SAAS,GAAGmE,gBAAgB,GAAGC,eAAe,CAAC,CAAC;EACtDD,gBAAgB,IAAI,CAAC;EAErB,IAAI6B,sBAA2B,GAAG,IAAI;EACtC,IAAIV,UAAU,EAAE;IACdU,sBAAsB,GAAG5G,YAAY,CAACsC,WAAW,CAC/CxC,8BAA8B,EAC7ByC,GAA4C,IAAK;MAChD,IAAIA,GAAG,CAAC3B,SAAS,KAAKA,SAAS,EAAE;MACjCsF,UAAU,CAAC3D,GAAG,CAACsE,QAAQ,CAAC;IAC1B,CACF,CAAC;EACH;EAEA,MAAMC,QAAQ,GAAGpB,WAAW,CAACc,WAAW,CAA6B;EACrE,MAAM;IACJjG,GAAG;IACHC,WAAW;IACXC,KAAK,EAAEsG;EACT,CAAC,GAAG,MAAM3G,sBAAO,CAAC4G,WAAW,CAACpG,SAAS,EAAE;IACvCH,KAAK,EAAEO,IAAI;IACXqF,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9BW,qBAAqB,EAAE,CAAC,CAACf,UAAU;IACnCK,YAAY,EAAEO,QAAQ;IACtBL,IAAI,EAAEE,QAAQ;IACd,GAAGD;EACL,CAAC,CAAC,CAAC1D,KAAK,CAAEC,GAAQ,IAAK;IAAA,IAAAiE,qBAAA;IACrB,CAAAA,qBAAA,GAAAN,sBAAsB,cAAAM,qBAAA,uBAAtBA,qBAAA,CAAwBnE,MAAM,CAAC,CAAC;IAChC,MAAME,GAAG;EACX,CAAC,CAAC;EACF,CAAAmD,sBAAA,GAAAQ,sBAAsB,cAAAR,sBAAA,uBAAtBA,sBAAA,CAAwBrD,MAAM,CAAC,CAAC;EAChC,OAAO,IAAIzC,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAEsG;EAAa,CAAC,CAAC;AAC/E;AAEO,eAAeI,eAAeA,CAAA,EAAkB;EACrD,OAAO/G,sBAAO,CAACgH,kBAAkB,CAAC,CAAC;AACrC"}
@@ -1 +1 @@
1
- {"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;AAqKlD,eAAeA,mBAAmB,CAACC,GAAG,CAAO,SAAS,CAAC"}
1
+ {"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;AAwMlD,eAAeA,mBAAmB,CAACC,GAAG,CAAO,SAAS,CAAC"}