cui-llama.rn 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/sampling.cpp CHANGED
@@ -98,8 +98,8 @@ struct ring_buffer {
98
98
  std::vector<T> data;
99
99
  };
100
100
 
101
- struct gpt_sampler {
102
- gpt_sampler_params params;
101
+ struct common_sampler {
102
+ common_sampler_params params;
103
103
 
104
104
  struct llama_sampler * grmr;
105
105
  struct llama_sampler * chain;
@@ -125,26 +125,26 @@ struct gpt_sampler {
125
125
  }
126
126
  };
127
127
 
128
- std::string gpt_sampler_params::print() const {
128
+ std::string common_sampler_params::print() const {
129
129
  char result[1024];
130
130
 
131
131
  snprintf(result, sizeof(result),
132
132
  "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
133
- "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
133
+ "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
134
134
  "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
135
135
  penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
136
- top_k, tfs_z, top_p, min_p, typ_p, temp,
136
+ top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
137
137
  mirostat, mirostat_eta, mirostat_tau);
138
138
 
139
139
  return std::string(result);
140
140
  }
141
141
 
142
- struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const struct gpt_sampler_params & params) {
142
+ struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params) {
143
143
  llama_sampler_chain_params lparams = llama_sampler_chain_default_params();
144
144
 
145
145
  lparams.no_perf = params.no_perf;
146
146
 
147
- auto * result = new gpt_sampler {
147
+ auto * result = new common_sampler {
148
148
  /* .params = */ params,
149
149
  /* .grmr = */ llama_sampler_init_grammar(model, params.grammar.c_str(), "root"),
150
150
  /* .chain = */ llama_sampler_chain_init(lparams),
@@ -170,64 +170,53 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st
170
170
  params.penalty_present,
171
171
  params.penalize_nl,
172
172
  params.ignore_eos));
173
-
174
- if (params.temp > 0.0f) {
175
- if (params.mirostat == 0) {
176
- for (const auto & cnstr : params.samplers) {
177
- switch (cnstr) {
178
- case GPT_SAMPLER_TYPE_TOP_K:
179
- llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
180
- break;
181
- case GPT_SAMPLER_TYPE_TOP_P:
182
- llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
183
- break;
184
- case GPT_SAMPLER_TYPE_MIN_P:
185
- llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
186
- break;
187
- case GPT_SAMPLER_TYPE_TFS_Z:
188
- llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
189
- break;
190
- case GPT_SAMPLER_TYPE_TYPICAL_P:
191
- llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
192
- break;
193
- case GPT_SAMPLER_TYPE_XTC:
194
- llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_p, params.xtc_t, params.min_keep, params.seed));
195
- break;
196
- case GPT_SAMPLER_TYPE_TEMPERATURE:
197
- llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
198
- break;
199
- default:
200
- LM_GGML_ASSERT(false && "unknown sampler type");
201
- }
173
+
174
+ if (params.mirostat == 0) {
175
+ for (const auto & cnstr : params.samplers) {
176
+ switch (cnstr) {
177
+ case COMMON_SAMPLER_TYPE_TOP_K:
178
+ llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
179
+ break;
180
+ case COMMON_SAMPLER_TYPE_TOP_P:
181
+ llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
182
+ break;
183
+ case COMMON_SAMPLER_TYPE_MIN_P:
184
+ llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
185
+ break;
186
+ case COMMON_SAMPLER_TYPE_XTC:
187
+ llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
188
+ break;
189
+ case COMMON_SAMPLER_TYPE_TFS_Z:
190
+ llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
191
+ break;
192
+ case COMMON_SAMPLER_TYPE_TYPICAL_P:
193
+ llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
194
+ break;
195
+ case COMMON_SAMPLER_TYPE_TEMPERATURE:
196
+ llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
197
+ break;
198
+ case COMMON_SAMPLER_TYPE_INFILL:
199
+ llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model));
200
+ break;
201
+ default:
202
+ LM_GGML_ASSERT(false && "unknown sampler type");
202
203
  }
203
- llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
204
- llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
205
- } else if (params.mirostat == 1) {
206
- llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
207
- llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
208
- } else if (params.mirostat == 2) {
209
- llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
210
- llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
211
- } else {
212
- LM_GGML_ASSERT(false && "unknown mirostat version");
213
204
  }
205
+ llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
206
+ } else if (params.mirostat == 1) {
207
+ llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
208
+ llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
209
+ } else if (params.mirostat == 2) {
210
+ llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
211
+ llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
214
212
  } else {
215
- if (params.n_probs > 0) {
216
- // some use cases require to sample greedily, but still obtain the probabilities of the top tokens
217
- // ref: https://github.com/ggerganov/llama.cpp/pull/9605
218
- //
219
- // the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
220
- // it is much faster, since we avoid sorting all tokens and should give a good approximation
221
- llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
222
- llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
223
- }
224
- llama_sampler_chain_add(result->chain, llama_sampler_init_greedy());
213
+ LM_GGML_ASSERT(false && "unknown mirostat version");
225
214
  }
226
215
 
227
216
  return result;
228
217
  }
229
218
 
230
- void gpt_sampler_free(struct gpt_sampler * gsmpl) {
219
+ void common_sampler_free(struct common_sampler * gsmpl) {
231
220
  if (gsmpl) {
232
221
  llama_sampler_free(gsmpl->grmr);
233
222
 
@@ -237,7 +226,7 @@ void gpt_sampler_free(struct gpt_sampler * gsmpl) {
237
226
  }
238
227
  }
239
228
 
240
- void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool accept_grammar) {
229
+ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
241
230
  if (accept_grammar) {
242
231
  llama_sampler_accept(gsmpl->grmr, token);
243
232
  }
@@ -247,14 +236,14 @@ void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool acce
247
236
  gsmpl->prev.push_back(token);
248
237
  }
249
238
 
250
- void gpt_sampler_reset(struct gpt_sampler * gsmpl) {
239
+ void common_sampler_reset(struct common_sampler * gsmpl) {
251
240
  llama_sampler_reset(gsmpl->grmr);
252
241
 
253
242
  llama_sampler_reset(gsmpl->chain);
254
243
  }
255
244
 
256
- struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl) {
257
- return new gpt_sampler {
245
+ struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
246
+ return new common_sampler {
258
247
  /* .params = */ gsmpl->params,
259
248
  /* .grmr = */ llama_sampler_clone(gsmpl->grmr),
260
249
  /* .chain = */ llama_sampler_clone(gsmpl->chain),
@@ -264,7 +253,7 @@ struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl) {
264
253
  };
265
254
  }
266
255
 
267
- void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler * gsmpl) {
256
+ void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl) {
268
257
  // TODO: measure grammar performance
269
258
 
270
259
  if (gsmpl) {
@@ -275,7 +264,7 @@ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler *
275
264
  }
276
265
  }
277
266
 
278
- llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
267
+ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
279
268
  gsmpl->set_logits(ctx, idx);
280
269
 
281
270
  auto & grmr = gsmpl->grmr;
@@ -321,21 +310,21 @@ llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context
321
310
  return cur_p.data[cur_p.selected].id;
322
311
  }
323
312
 
324
- uint32_t gpt_sampler_get_seed(const struct gpt_sampler * gsmpl) {
313
+ uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl) {
325
314
  return llama_sampler_get_seed(gsmpl->chain);
326
315
  }
327
316
 
328
317
  // helpers
329
318
 
330
- llama_token_data_array * gpt_sampler_get_candidates(struct gpt_sampler * gsmpl) {
319
+ llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl) {
331
320
  return &gsmpl->cur_p;
332
321
  }
333
322
 
334
- llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl) {
323
+ llama_token common_sampler_last(const struct common_sampler * gsmpl) {
335
324
  return gsmpl->prev.rat(0);
336
325
  }
337
326
 
338
- std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
327
+ std::string common_sampler_print(const struct common_sampler * gsmpl) {
339
328
  std::string result = "logits ";
340
329
 
341
330
  for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
@@ -346,7 +335,7 @@ std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
346
335
  return result;
347
336
  }
348
337
 
349
- std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx_main, int n) {
338
+ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_main, int n) {
350
339
  n = std::min(n, (int) gsmpl->prev.size());
351
340
 
352
341
  if (n <= 0) {
@@ -361,68 +350,69 @@ std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx_main,
361
350
 
362
351
  LM_GGML_ASSERT(id != LLAMA_TOKEN_NULL && "null token in the sampling history - should not happen");
363
352
 
364
- result += llama_token_to_piece(ctx_main, id);
353
+ result += common_token_to_piece(ctx_main, id);
365
354
  }
366
355
 
367
356
  return result;
368
357
  }
369
358
 
370
- char gpt_sampler_type_to_chr(enum gpt_sampler_type cnstr) {
359
+ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
371
360
  switch (cnstr) {
372
- case GPT_SAMPLER_TYPE_TOP_K: return 'k';
373
- case GPT_SAMPLER_TYPE_TFS_Z: return 'f';
374
- case GPT_SAMPLER_TYPE_TYPICAL_P: return 'y';
375
- case GPT_SAMPLER_TYPE_TOP_P: return 'p';
376
- case GPT_SAMPLER_TYPE_MIN_P: return 'm';
377
- case GPT_SAMPLER_TYPE_TEMPERATURE: return 't';
378
- case GPT_SAMPLER_TYPE_XTC: return 'x';
361
+ case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
362
+ case COMMON_SAMPLER_TYPE_TFS_Z: return 'f';
363
+ case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
364
+ case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
365
+ case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
366
+ case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
367
+ case COMMON_SAMPLER_TYPE_XTC: return 'x';
368
+ case COMMON_SAMPLER_TYPE_INFILL: return 'i';
379
369
  default : return '?';
380
370
  }
381
371
  }
382
372
 
383
- std::string gpt_sampler_type_to_str(enum gpt_sampler_type cnstr) {
373
+ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
384
374
  switch (cnstr) {
385
- case GPT_SAMPLER_TYPE_TOP_K: return "top_k";
386
- case GPT_SAMPLER_TYPE_TFS_Z: return "tfs_z";
387
- case GPT_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
388
- case GPT_SAMPLER_TYPE_TOP_P: return "top_p";
389
- case GPT_SAMPLER_TYPE_MIN_P: return "min_p";
390
- case GPT_SAMPLER_TYPE_XTC: return "xtc";
391
- case GPT_SAMPLER_TYPE_TEMPERATURE: return "temperature";
375
+ case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
376
+ case COMMON_SAMPLER_TYPE_TFS_Z: return "tfs_z";
377
+ case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
378
+ case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
379
+ case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
380
+ case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
381
+ case COMMON_SAMPLER_TYPE_XTC: return "xtc";
382
+ case COMMON_SAMPLER_TYPE_INFILL: return "infill";
392
383
  default : return "";
393
384
  }
394
385
  }
395
386
 
396
- std::vector<gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
397
- std::unordered_map<std::string, gpt_sampler_type> sampler_canonical_name_map {
398
- { "top_k", GPT_SAMPLER_TYPE_TOP_K },
399
- { "top_p", GPT_SAMPLER_TYPE_TOP_P },
400
- { "typ_p", GPT_SAMPLER_TYPE_TYPICAL_P },
401
- { "min_p", GPT_SAMPLER_TYPE_MIN_P },
402
- { "tfs_z", GPT_SAMPLER_TYPE_TFS_Z },
403
- { "xtc", GPT_SAMPLER_TYPE_XTC},
404
- { "temperature", GPT_SAMPLER_TYPE_TEMPERATURE },
387
+ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
388
+ std::unordered_map<std::string, common_sampler_type> sampler_canonical_name_map {
389
+ { "top_k", COMMON_SAMPLER_TYPE_TOP_K },
390
+ { "top_p", COMMON_SAMPLER_TYPE_TOP_P },
391
+ { "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
392
+ { "min_p", COMMON_SAMPLER_TYPE_MIN_P },
393
+ { "tfs_z", COMMON_SAMPLER_TYPE_TFS_Z },
394
+ { "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
395
+ { "xtc", COMMON_SAMPLER_TYPE_XTC },
396
+ { "infill", COMMON_SAMPLER_TYPE_INFILL },
405
397
  };
406
398
 
407
399
  // since samplers names are written multiple ways
408
400
  // make it ready for both system names and input names
409
- std::unordered_map<std::string, gpt_sampler_type> sampler_alt_name_map {
410
- { "top-k", GPT_SAMPLER_TYPE_TOP_K },
411
- { "top-p", GPT_SAMPLER_TYPE_TOP_P },
412
- { "nucleus", GPT_SAMPLER_TYPE_TOP_P },
413
- { "typical-p", GPT_SAMPLER_TYPE_TYPICAL_P },
414
- { "typical", GPT_SAMPLER_TYPE_TYPICAL_P },
415
- { "typ-p", GPT_SAMPLER_TYPE_TYPICAL_P },
416
- { "typ", GPT_SAMPLER_TYPE_TYPICAL_P },
417
- { "min-p", GPT_SAMPLER_TYPE_MIN_P },
418
- { "tfs-z", GPT_SAMPLER_TYPE_TFS_Z },
419
- { "tfs", GPT_SAMPLER_TYPE_TFS_Z },
420
- { "xtc_p", GPT_SAMPLER_TYPE_XTC},
421
- { "xtc_t", GPT_SAMPLER_TYPE_XTC},
422
- { "temp", GPT_SAMPLER_TYPE_TEMPERATURE },
401
+ std::unordered_map<std::string, common_sampler_type> sampler_alt_name_map {
402
+ { "top-k", COMMON_SAMPLER_TYPE_TOP_K },
403
+ { "top-p", COMMON_SAMPLER_TYPE_TOP_P },
404
+ { "nucleus", COMMON_SAMPLER_TYPE_TOP_P },
405
+ { "typical-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
406
+ { "typical", COMMON_SAMPLER_TYPE_TYPICAL_P },
407
+ { "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
408
+ { "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
409
+ { "min-p", COMMON_SAMPLER_TYPE_MIN_P },
410
+ { "tfs-z", COMMON_SAMPLER_TYPE_TFS_Z },
411
+ { "tfs", COMMON_SAMPLER_TYPE_TFS_Z },
412
+ { "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
423
413
  };
424
414
 
425
- std::vector<gpt_sampler_type> samplers;
415
+ std::vector<common_sampler_type> samplers;
426
416
  samplers.reserve(names.size());
427
417
 
428
418
  for (const auto & name : names) {
@@ -442,18 +432,19 @@ std::vector<gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std
442
432
  return samplers;
443
433
  }
444
434
 
445
- std::vector<gpt_sampler_type> gpt_sampler_types_from_chars(const std::string & chars) {
446
- std::unordered_map<char, gpt_sampler_type> sampler_name_map = {
447
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TOP_K), GPT_SAMPLER_TYPE_TOP_K },
448
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TFS_Z), GPT_SAMPLER_TYPE_TFS_Z },
449
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TYPICAL_P), GPT_SAMPLER_TYPE_TYPICAL_P },
450
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TOP_P), GPT_SAMPLER_TYPE_TOP_P },
451
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_MIN_P), GPT_SAMPLER_TYPE_MIN_P },
452
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_XTC), GPT_SAMPLER_TYPE_XTC},
453
- { gpt_sampler_type_to_chr(GPT_SAMPLER_TYPE_TEMPERATURE), GPT_SAMPLER_TYPE_TEMPERATURE }
435
+ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::string & chars) {
436
+ std::unordered_map<char, common_sampler_type> sampler_name_map = {
437
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
438
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TFS_Z), COMMON_SAMPLER_TYPE_TFS_Z },
439
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
440
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
441
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
442
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
443
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
444
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
454
445
  };
455
446
 
456
- std::vector<gpt_sampler_type> samplers;
447
+ std::vector<common_sampler_type> samplers;
457
448
  samplers.reserve(chars.size());
458
449
 
459
450
  for (const auto & c : chars) {
package/cpp/sampling.h CHANGED
@@ -7,7 +7,7 @@
7
7
  #include <string>
8
8
  #include <vector>
9
9
 
10
- // gpt_sampler extends llama_sampler with additional functionality:
10
+ // common_sampler extends llama_sampler with additional functionality:
11
11
  //
12
12
  // - grammar support
13
13
  // - custom sampler logic based on the parameters
@@ -23,30 +23,30 @@
23
23
  // token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
24
24
  // grammar constraints are applied to the full vocabulary and the token is resampled.
25
25
  //
26
- // The gpt_sampler also maintains a container with the last accepted tokens. In the future, this can
26
+ // The common_sampler also maintains a container with the last accepted tokens. In the future, this can
27
27
  // be moved into the core llama library.
28
28
  //
29
- // For convenience, the gpt_sampler also maintains a container with the current candidate tokens.
29
+ // For convenience, the common_sampler also maintains a container with the current candidate tokens.
30
30
  // This can be used to access the probabilities of the rest of the non-sampled tokens.
31
31
  //
32
32
  // TODO: measure grammar performance
33
33
  //
34
34
 
35
- struct gpt_sampler;
35
+ struct common_sampler;
36
36
 
37
37
  // llama_sampler API overloads
38
38
 
39
- struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const struct gpt_sampler_params & params);
39
+ struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params);
40
40
 
41
- void gpt_sampler_free(struct gpt_sampler * gsmpl);
41
+ void common_sampler_free(struct common_sampler * gsmpl);
42
42
 
43
43
  // if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
44
- void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool accept_grammar);
45
- void gpt_sampler_reset (struct gpt_sampler * gsmpl);
46
- struct gpt_sampler * gpt_sampler_clone (struct gpt_sampler * gsmpl);
44
+ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
45
+ void common_sampler_reset (struct common_sampler * gsmpl);
46
+ struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
47
47
 
48
48
  // arguments can be nullptr to skip printing
49
- void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler * gsmpl);
49
+ void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl);
50
50
 
51
51
  // extended sampling implementation:
52
52
  //
@@ -58,26 +58,26 @@ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler *
58
58
  // if grammar_first is true, the grammar is applied before the samplers (slower)
59
59
  // useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
60
60
  //
61
- llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
61
+ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
62
62
 
63
- uint32_t gpt_sampler_get_seed(const struct gpt_sampler * gsmpl);
63
+ uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
64
64
 
65
65
  // helpers
66
66
 
67
67
  // access the internal list of current candidate tokens
68
- llama_token_data_array * gpt_sampler_get_candidates(struct gpt_sampler * gsmpl);
68
+ llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl);
69
69
 
70
70
  // get the last accepted token
71
- llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl);
71
+ llama_token common_sampler_last(const struct common_sampler * gsmpl);
72
72
 
73
73
  // print the sampler chain into a string
74
- std::string gpt_sampler_print(const struct gpt_sampler * gsmpl);
74
+ std::string common_sampler_print(const struct common_sampler * gsmpl);
75
75
 
76
76
  // get a string representation of the last accepted tokens
77
- std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx, int n);
77
+ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
78
78
 
79
- char gpt_sampler_type_to_chr(enum gpt_sampler_type cnstr);
80
- std::string gpt_sampler_type_to_str(enum gpt_sampler_type cnstr);
79
+ char common_sampler_type_to_chr(enum common_sampler_type cnstr);
80
+ std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
81
81
 
82
- std::vector<enum gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
83
- std::vector<enum gpt_sampler_type> gpt_sampler_types_from_chars(const std::string & chars);
82
+ std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
83
+ std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.2.3",
3
+ "version": "1.2.4",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",