cui-llama.rn 1.2.3 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/sampling.cpp CHANGED
@@ -98,8 +98,8 @@ struct ring_buffer {
98
98
  std::vector<T> data;
99
99
  };
100
100
 
101
- struct gpt_sampler {
102
- gpt_sampler_params params;
101
+ struct common_sampler {
102
+ common_sampler_params params;
103
103
 
104
104
  struct llama_sampler * grmr;
105
105
  struct llama_sampler * chain;
@@ -125,26 +125,28 @@ struct gpt_sampler {
125
125
  }
126
126
  };
127
127
 
128
- std::string gpt_sampler_params::print() const {
128
+ std::string common_sampler_params::print() const {
129
129
  char result[1024];
130
130
 
131
131
  snprintf(result, sizeof(result),
132
132
  "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
133
- "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
133
+ "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
134
+ "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
134
135
  "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
135
136
  penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
136
- top_k, tfs_z, top_p, min_p, typ_p, temp,
137
+ dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
138
+ top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
137
139
  mirostat, mirostat_eta, mirostat_tau);
138
140
 
139
141
  return std::string(result);
140
142
  }
141
143
 
142
- struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const struct gpt_sampler_params & params) {
144
+ struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params) {
143
145
  llama_sampler_chain_params lparams = llama_sampler_chain_default_params();
144
146
 
145
147
  lparams.no_perf = params.no_perf;
146
148
 
147
- auto * result = new gpt_sampler {
149
+ auto * result = new common_sampler {
148
150
  /* .params = */ params,
149
151
  /* .grmr = */ llama_sampler_init_grammar(model, params.grammar.c_str(), "root"),
150
152
  /* .chain = */ llama_sampler_chain_init(lparams),
@@ -170,64 +172,61 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st
170
172
  params.penalty_present,
171
173
  params.penalize_nl,
172
174
  params.ignore_eos));
173
-
174
- if (params.temp > 0.0f) {
175
- if (params.mirostat == 0) {
176
- for (const auto & cnstr : params.samplers) {
177
- switch (cnstr) {
178
- case GPT_SAMPLER_TYPE_TOP_K:
179
- llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
180
- break;
181
- case GPT_SAMPLER_TYPE_TOP_P:
182
- llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
183
- break;
184
- case GPT_SAMPLER_TYPE_MIN_P:
185
- llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
186
- break;
187
- case GPT_SAMPLER_TYPE_TFS_Z:
188
- llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
189
- break;
190
- case GPT_SAMPLER_TYPE_TYPICAL_P:
191
- llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
192
- break;
193
- case GPT_SAMPLER_TYPE_XTC:
194
- llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_p, params.xtc_t, params.min_keep, params.seed));
195
- break;
196
- case GPT_SAMPLER_TYPE_TEMPERATURE:
197
- llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
175
+
176
+ if (params.mirostat == 0) {
177
+ for (const auto & cnstr : params.samplers) {
178
+ switch (cnstr) {
179
+ case COMMON_SAMPLER_TYPE_DRY:
180
+ {
181
+ std::vector<const char*> c_breakers;
182
+ c_breakers.reserve(params.dry_sequence_breakers.size());
183
+ for (const auto& str : params.dry_sequence_breakers) {
184
+ c_breakers.push_back(str.c_str());
185
+ }
186
+
187
+ llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
188
+ }
198
189
  break;
199
- default:
200
- LM_GGML_ASSERT(false && "unknown sampler type");
201
- }
190
+ case COMMON_SAMPLER_TYPE_TOP_K:
191
+ llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
192
+ break;
193
+ case COMMON_SAMPLER_TYPE_TOP_P:
194
+ llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
195
+ break;
196
+ case COMMON_SAMPLER_TYPE_MIN_P:
197
+ llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
198
+ break;
199
+ case COMMON_SAMPLER_TYPE_XTC:
200
+ llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
201
+ break;
202
+ case COMMON_SAMPLER_TYPE_TYPICAL_P:
203
+ llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
204
+ break;
205
+ case COMMON_SAMPLER_TYPE_TEMPERATURE:
206
+ llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
207
+ break;
208
+ case COMMON_SAMPLER_TYPE_INFILL:
209
+ llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model));
210
+ break;
211
+ default:
212
+ LM_GGML_ASSERT(false && "unknown sampler type");
202
213
  }
203
- llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
204
- llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
205
- } else if (params.mirostat == 1) {
206
- llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
207
- llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
208
- } else if (params.mirostat == 2) {
209
- llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
210
- llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
211
- } else {
212
- LM_GGML_ASSERT(false && "unknown mirostat version");
213
214
  }
215
+ llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
216
+ } else if (params.mirostat == 1) {
217
+ llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
218
+ llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
219
+ } else if (params.mirostat == 2) {
220
+ llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
221
+ llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
214
222
  } else {
215
- if (params.n_probs > 0) {
216
- // some use cases require to sample greedily, but still obtain the probabilities of the top tokens
217
- // ref: https://github.com/ggerganov/llama.cpp/pull/9605
218
- //
219
- // the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
220
- // it is much faster, since we avoid sorting all tokens and should give a good approximation
221
- llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
222
- llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
223
- }
224
- llama_sampler_chain_add(result->chain, llama_sampler_init_greedy());
223
+ LM_GGML_ASSERT(false && "unknown mirostat version");
225
224
  }
226
225
 
227
226
  return result;
228
227
  }
229
228
 
230
- void gpt_sampler_free(struct gpt_sampler * gsmpl) {
229
+ void common_sampler_free(struct common_sampler * gsmpl) {
231
230
  if (gsmpl) {
232
231
  llama_sampler_free(gsmpl->grmr);
233
232
 
@@ -237,7 +236,7 @@ void gpt_sampler_free(struct gpt_sampler * gsmpl) {
237
236
  }
238
237
  }
239
238
 
240
- void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool accept_grammar) {
239
+ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
241
240
  if (accept_grammar) {
242
241
  llama_sampler_accept(gsmpl->grmr, token);
243
242
  }
@@ -247,14 +246,14 @@ void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool acce
247
246
  gsmpl->prev.push_back(token);
248
247
  }
249
248
 
250
- void gpt_sampler_reset(struct gpt_sampler * gsmpl) {
249
+ void common_sampler_reset(struct common_sampler * gsmpl) {
251
250
  llama_sampler_reset(gsmpl->grmr);
252
251
 
253
252
  llama_sampler_reset(gsmpl->chain);
254
253
  }
255
254
 
256
- struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl) {
257
- return new gpt_sampler {
255
+ struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
256
+ return new common_sampler {
258
257
  /* .params = */ gsmpl->params,
259
258
  /* .grmr = */ llama_sampler_clone(gsmpl->grmr),
260
259
  /* .chain = */ llama_sampler_clone(gsmpl->chain),
@@ -264,7 +263,7 @@ struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl) {
264
263
  };
265
264
  }
266
265
 
267
- void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler * gsmpl) {
266
+ void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl) {
268
267
  // TODO: measure grammar performance
269
268
 
270
269
  if (gsmpl) {
@@ -275,7 +274,7 @@ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler *
275
274
  }
276
275
  }
277
276
 
278
- llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
277
+ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
279
278
  gsmpl->set_logits(ctx, idx);
280
279
 
281
280
  auto & grmr = gsmpl->grmr;
@@ -321,21 +320,21 @@ llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context
321
320
  return cur_p.data[cur_p.selected].id;
322
321
  }
323
322
 
324
- uint32_t gpt_sampler_get_seed(const struct gpt_sampler * gsmpl) {
323
+ uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl) {
325
324
  return llama_sampler_get_seed(gsmpl->chain);
326
325
  }
327
326
 
328
327
  // helpers
329
328
 
330
- llama_token_data_array * gpt_sampler_get_candidates(struct gpt_sampler * gsmpl) {
329
+ llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl) {
331
330
  return &gsmpl->cur_p;
332
331
  }
333
332
 
334
- llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl) {
333
+ llama_token common_sampler_last(const struct common_sampler * gsmpl) {
335
334
  return gsmpl->prev.rat(0);
336
335
  }
337
336
 
338
- std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
337
+ std::string common_sampler_print(const struct common_sampler * gsmpl) {
339
338
  std::string result = "logits ";
340
339
 
341
340
  for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
@@ -346,7 +345,7 @@ std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
346
345
  return result;
347
346
  }
348
347
 
349
- std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx_main, int n) {
348
+ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_main, int n) {
350
349
  n = std::min(n, (int) gsmpl->prev.size());
351
350
 
352
351
  if (n <= 0) {
@@ -361,68 +360,67 @@ std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx_main,
361
360
 
362
361
  LM_GGML_ASSERT(id != LLAMA_TOKEN_NULL && "null token in the sampling history - should not happen");
363
362
 
364
- result += llama_token_to_piece(ctx_main, id);
363
+ result += common_token_to_piece(ctx_main, id);
365
364
  }
366
365
 
367
366
  return result;
368
367
  }
369
368
 
370
- char gpt_sampler_type_to_chr(enum gpt_sampler_type cnstr) {
369
+ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
371
370
  switch (cnstr) {
372
- case GPT_SAMPLER_TYPE_TOP_K: return 'k';
373
- case GPT_SAMPLER_TYPE_TFS_Z: return 'f';
374
- case GPT_SAMPLER_TYPE_TYPICAL_P: return 'y';
375
- case GPT_SAMPLER_TYPE_TOP_P: return 'p';
376
- case GPT_SAMPLER_TYPE_MIN_P: return 'm';
377
- case GPT_SAMPLER_TYPE_TEMPERATURE: return 't';
378
- case GPT_SAMPLER_TYPE_XTC: return 'x';
371
+ case COMMON_SAMPLER_TYPE_DRY: return 'd';
372
+ case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
373
+ case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
374
+ case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
375
+ case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
376
+ case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
377
+ case COMMON_SAMPLER_TYPE_XTC: return 'x';
378
+ case COMMON_SAMPLER_TYPE_INFILL: return 'i';
379
379
  default : return '?';
380
380
  }
381
381
  }
382
382
 
383
- std::string gpt_sampler_type_to_str(enum gpt_sampler_type cnstr) {
383
+ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
384
384
  switch (cnstr) {
385
- case GPT_SAMPLER_TYPE_TOP_K: return "top_k";
386
- case GPT_SAMPLER_TYPE_TFS_Z: return "tfs_z";
387
- case GPT_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
388
- case GPT_SAMPLER_TYPE_TOP_P: return "top_p";
389
- case GPT_SAMPLER_TYPE_MIN_P: return "min_p";
390
- case GPT_SAMPLER_TYPE_XTC: return "xtc";
391
- case GPT_SAMPLER_TYPE_TEMPERATURE: return "temperature";
385
+ case COMMON_SAMPLER_TYPE_DRY: return "dry";
386
+ case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
387
+ case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
388
+ case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
389
+ case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
390
+ case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
391
+ case COMMON_SAMPLER_TYPE_XTC: return "xtc";
392
+ case COMMON_SAMPLER_TYPE_INFILL: return "infill";
392
393
  default : return "";
393
394
  }
394
395
  }
395
396
 
396
- std::vector<gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
397
- std::unordered_map<std::string, gpt_sampler_type> sampler_canonical_name_map {
398
- { "top_k", GPT_SAMPLER_TYPE_TOP_K },
399
- { "top_p", GPT_SAMPLER_TYPE_TOP_P },
400
- { "typ_p", GPT_SAMPLER_TYPE_TYPICAL_P },
401
- { "min_p", GPT_SAMPLER_TYPE_MIN_P },
402
- { "tfs_z", GPT_SAMPLER_TYPE_TFS_Z },
403
- { "xtc", GPT_SAMPLER_TYPE_XTC},
404
- { "temperature", GPT_SAMPLER_TYPE_TEMPERATURE },
397
+ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
398
+ std::unordered_map<std::string, common_sampler_type> sampler_canonical_name_map {
399
+ { "dry", COMMON_SAMPLER_TYPE_DRY },
400
+ { "top_k", COMMON_SAMPLER_TYPE_TOP_K },
401
+ { "top_p", COMMON_SAMPLER_TYPE_TOP_P },
402
+ { "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
403
+ { "min_p", COMMON_SAMPLER_TYPE_MIN_P },
404
+ { "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
405
+ { "xtc", COMMON_SAMPLER_TYPE_XTC },
406
+ { "infill", COMMON_SAMPLER_TYPE_INFILL },
405
407
  };
406
408
 
407
409
  // since samplers names are written multiple ways
408
410
  // make it ready for both system names and input names
409
- std::unordered_map<std::string, gpt_sampler_type> sampler_alt_name_map {
410
- { "top-k", GPT_SAMPLER_TYPE_TOP_K },
411
- { "top-p", GPT_SAMPLER_TYPE_TOP_P },
412
- { "nucleus", GPT_SAMPLER_TYPE_TOP_P },
413
- { "typical-p", GPT_SAMPLER_TYPE_TYPICAL_P },
414
- { "typical", GPT_SAMPLER_TYPE_TYPICAL_P },
415
- { "typ-p", GPT_SAMPLER_TYPE_TYPICAL_P },
416
- { "typ", GPT_SAMPLER_TYPE_TYPICAL_P },
417
- { "min-p", GPT_SAMPLER_TYPE_MIN_P },
418
- { "tfs-z", GPT_SAMPLER_TYPE_TFS_Z },
419
- { "tfs", GPT_SAMPLER_TYPE_TFS_Z },
420
- { "xtc_p", GPT_SAMPLER_TYPE_XTC},
421
- { "xtc_t", GPT_SAMPLER_TYPE_XTC},
422
- { "temp", GPT_SAMPLER_TYPE_TEMPERATURE },
411
+ std::unordered_map<std::string, common_sampler_type> sampler_alt_name_map {
412
+ { "top-k", COMMON_SAMPLER_TYPE_TOP_K },
413
+ { "top-p", COMMON_SAMPLER_TYPE_TOP_P },
414
+ { "nucleus", COMMON_SAMPLER_TYPE_TOP_P },
415
+ { "typical-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
416
+ { "typical", COMMON_SAMPLER_TYPE_TYPICAL_P },
417
+ { "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
418
+ { "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
419
+ { "min-p", COMMON_SAMPLER_TYPE_MIN_P },
420
+ { "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
423
421
  };
424
422
 
425
- std::vector<gpt_sampler_type> samplers;
423
+ std::vector<common_sampler_type> samplers;
426
424
  samplers.reserve(names.size());
427
425
 
428
426
  for (const auto & name : names) {
@@ -442,18 +440,19 @@ std::vector<gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std
442
440
  return samplers;
443
441
  }
444
442
 
445
- std::vector<gpt_sampler_type> gpt_sampler_types_from_chars(const std::string & chars) {
446
- std::unordered_map<char, gpt_sampler_type> sampler_name_map = {
447
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TOP_K), GPT_SAMPLER_TYPE_TOP_K },
448
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TFS_Z), GPT_SAMPLER_TYPE_TFS_Z },
449
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TYPICAL_P), GPT_SAMPLER_TYPE_TYPICAL_P },
450
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TOP_P), GPT_SAMPLER_TYPE_TOP_P },
451
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_MIN_P), GPT_SAMPLER_TYPE_MIN_P },
452
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_XTC), GPT_SAMPLER_TYPE_XTC},
453
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TEMPERATURE), GPT_SAMPLER_TYPE_TEMPERATURE }
443
+ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::string & chars) {
444
+ std::unordered_map<char, common_sampler_type> sampler_name_map = {
445
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
446
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
447
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
448
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
449
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
450
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
451
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
452
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
454
453
  };
455
454
 
456
- std::vector<gpt_sampler_type> samplers;
455
+ std::vector<common_sampler_type> samplers;
457
456
  samplers.reserve(chars.size());
458
457
 
459
458
  for (const auto & c : chars) {
package/cpp/sampling.h CHANGED
@@ -7,7 +7,7 @@
7
7
  #include <string>
8
8
  #include <vector>
9
9
 
10
- // gpt_sampler extends llama_sampler with additional functionality:
10
+ // common_sampler extends llama_sampler with additional functionality:
11
11
  //
12
12
  // - grammar support
13
13
  // - custom sampler logic based on the parameters
@@ -23,30 +23,30 @@
23
23
  // token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
24
24
  // grammar constraints are applied to the full vocabulary and the token is resampled.
25
25
  //
26
- // The gpt_sampler also maintains a container with the last accepted tokens. In the future, this can
26
+ // The common_sampler also maintains a container with the last accepted tokens. In the future, this can
27
27
  // be moved into the core llama library.
28
28
  //
29
- // For convenience, the gpt_sampler also maintains a container with the current candidate tokens.
29
+ // For convenience, the common_sampler also maintains a container with the current candidate tokens.
30
30
  // This can be used to access the probabilities of the rest of the non-sampled tokens.
31
31
  //
32
32
  // TODO: measure grammar performance
33
33
  //
34
34
 
35
- struct gpt_sampler;
35
+ struct common_sampler;
36
36
 
37
37
  // llama_sampler API overloads
38
38
 
39
- struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const struct gpt_sampler_params & params);
39
+ struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params);
40
40
 
41
- void gpt_sampler_free(struct gpt_sampler * gsmpl);
41
+ void common_sampler_free(struct common_sampler * gsmpl);
42
42
 
43
43
  // if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
44
- void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool accept_grammar);
45
- void gpt_sampler_reset (struct gpt_sampler * gsmpl);
46
- struct gpt_sampler * gpt_sampler_clone (struct gpt_sampler * gsmpl);
44
+ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
45
+ void common_sampler_reset (struct common_sampler * gsmpl);
46
+ struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
47
47
 
48
48
  // arguments can be nullptr to skip printing
49
- void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler * gsmpl);
49
+ void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl);
50
50
 
51
51
  // extended sampling implementation:
52
52
  //
@@ -58,26 +58,26 @@ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler *
58
58
  // if grammar_first is true, the grammar is applied before the samplers (slower)
59
59
  // useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
60
60
  //
61
- llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
61
+ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
62
62
 
63
- uint32_t gpt_sampler_get_seed(const struct gpt_sampler * gsmpl);
63
+ uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
64
64
 
65
65
  // helpers
66
66
 
67
67
  // access the internal list of current candidate tokens
68
- llama_token_data_array * gpt_sampler_get_candidates(struct gpt_sampler * gsmpl);
68
+ llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl);
69
69
 
70
70
  // get the last accepted token
71
- llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl);
71
+ llama_token common_sampler_last(const struct common_sampler * gsmpl);
72
72
 
73
73
  // print the sampler chain into a string
74
- std::string gpt_sampler_print(const struct gpt_sampler * gsmpl);
74
+ std::string common_sampler_print(const struct common_sampler * gsmpl);
75
75
 
76
76
  // get a string representation of the last accepted tokens
77
- std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx, int n);
77
+ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
78
78
 
79
- char gpt_sampler_type_to_chr(enum gpt_sampler_type cnstr);
80
- std::string gpt_sampler_type_to_str(enum gpt_sampler_type cnstr);
79
+ char common_sampler_type_to_chr(enum common_sampler_type cnstr);
80
+ std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
81
81
 
82
- std::vector<enum gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
83
- std::vector<enum gpt_sampler_type> gpt_sampler_types_from_chars(const std::string & chars);
82
+ std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
83
+ std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);
package/cpp/sgemm.cpp CHANGED
@@ -942,6 +942,36 @@ class tinyBLAS_Q0_AVX {
942
942
  return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4)), _mm_set1_epi8(8));
943
943
  }
944
944
 
945
+ inline __m256i load(const block_q5_0 *b) {
946
+ return _mm256_or_si256(denibble(b->qs), bittobyte(b->qh));
947
+ }
948
+
949
+ inline __m128i load0(const block_q5_0* b) {
950
+ const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
951
+ uint32_t x32;
952
+ memcpy(&x32, b->qh, sizeof(uint32_t));
953
+ __m128i qxl = _mm_and_si128(_mm_set1_epi8(15), x);
954
+ __m128i bytesl = _mm_cmpeq_epi8(_mm_set1_epi64x(-1),
955
+ _mm_or_si128(_mm_set1_epi64x(0x7fbfdfeff7fbfdfe),
956
+ _mm_shuffle_epi8(_mm_set1_epi32(x32),
957
+ _mm_set_epi64x(0x0101010101010101, 0x0000000000000000))));
958
+ bytesl = _mm_andnot_si128(bytesl, _mm_set1_epi8((char)0xF0));
959
+ return _mm_or_si128(qxl, bytesl);
960
+ }
961
+
962
+ inline __m128i load1(const block_q5_0* b) {
963
+ const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
964
+ uint32_t x32;
965
+ memcpy(&x32, b->qh, sizeof(uint32_t));
966
+ __m128i qxh = _mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4));
967
+ __m128i bytesh = _mm_cmpeq_epi8(_mm_set1_epi64x(-1),
968
+ _mm_or_si128(_mm_set1_epi64x(0x7fbfdfeff7fbfdfe),
969
+ _mm_shuffle_epi8(_mm_set1_epi32(x32),
970
+ _mm_set_epi64x(0x0303030303030303, 0x0202020202020202))));
971
+ bytesh = _mm_andnot_si128(bytesh, _mm_set1_epi8((char)0xF0));
972
+ return _mm_or_si128(qxh, bytesh);
973
+ }
974
+
945
975
  inline __m256i load(const block_iq4_nl *b) {
946
976
  return MM256_SET_M128I(load1(b), load0(b));
947
977
  }
@@ -973,6 +1003,17 @@ class tinyBLAS_Q0_AVX {
973
1003
  _mm_srli_epi16(x, 4), 1));
974
1004
  }
975
1005
 
1006
+ static inline __m256i bittobyte(const uint8_t *p) {
1007
+ uint32_t x32;
1008
+ memcpy(&x32, p, sizeof(uint32_t));
1009
+ __m256i bytes = _mm256_cmpeq_epi8(_mm256_set1_epi64x(-1),
1010
+ _mm256_or_si256(_mm256_set1_epi64x(0x7fbfdfeff7fbfdfe),
1011
+ _mm256_shuffle_epi8(_mm256_set1_epi32(x32),
1012
+ _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,
1013
+ 0x0101010101010101, 0x0000000000000000))));
1014
+ return _mm256_andnot_si256(bytes, _mm256_set1_epi8((char)0xF0));
1015
+ }
1016
+
976
1017
  const TA *const A;
977
1018
  const TB *const B;
978
1019
  TC *const C;
@@ -1182,6 +1223,22 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
1182
1223
  #endif
1183
1224
  }
1184
1225
 
1226
+ case LM_GGML_TYPE_Q5_0: {
1227
+ if (Btype != LM_GGML_TYPE_Q8_0)
1228
+ return false;
1229
+ #if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
1230
+ tinyBLAS_Q0_AVX<block_q5_0, block_q8_0, float> tb{
1231
+ k, (const block_q5_0 *)A, lda,
1232
+ (const block_q8_0 *)B, ldb,
1233
+ (float *)C, ldc,
1234
+ ith, nth};
1235
+ tb.matmul(m, n);
1236
+ return true;
1237
+ #else
1238
+ return false;
1239
+ #endif
1240
+ }
1241
+
1185
1242
  case LM_GGML_TYPE_IQ4_NL: {
1186
1243
  if (Btype != LM_GGML_TYPE_Q8_0)
1187
1244
  return false;
@@ -1 +1 @@
1
- {"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GAsKnCC,gCAAmB,CAACC,GAAG,CAAO,SAAS,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
1
+ {"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GAqKnCC,gCAAmB,CAACC,GAAG,CAAO,SAAS,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
@@ -1 +1 @@
1
- {"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;AAsKlD,eAAeA,mBAAmB,CAACC,GAAG,CAAO,SAAS,CAAC"}
1
+ {"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;AAqKlD,eAAeA,mBAAmB,CAACC,GAAG,CAAO,SAAS,CAAC"}
@@ -27,7 +27,6 @@ export type NativeCompletionParams = {
27
27
  min_p?: number;
28
28
  xtc_t?: number;
29
29
  xtc_p?: number;
30
- tfs_z?: number;
31
30
  typical_p?: number;
32
31
  temperature?: number;
33
32
  penalty_last_n?: number;
@@ -1 +1 @@
1
- {"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7C,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAErE,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAA;IAC1E,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
1
+ {"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7C,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAErE,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAA;IAC1E,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.2.3",
3
+ "version": "1.2.6",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -37,7 +37,6 @@ export type NativeCompletionParams = {
37
37
  min_p?: number
38
38
  xtc_t?: number
39
39
  xtc_p?: number
40
- tfs_z?: number
41
40
  typical_p?: number
42
41
  temperature?: number // -> temp
43
42
  penalty_last_n?: number