cui-llama.rn 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnllama/LlamaContext.java +5 -2
- package/android/src/main/jni.cpp +7 -7
- package/cpp/common.cpp +81 -63
- package/cpp/common.h +79 -62
- package/cpp/ggml-alloc.c +17 -19
- package/cpp/ggml-backend.cpp +59 -24
- package/cpp/ggml-impl.h +8 -0
- package/cpp/ggml.c +65 -23
- package/cpp/ggml.h +1 -0
- package/cpp/json-schema-to-grammar.cpp +1 -1
- package/cpp/llama-sampling.cpp +366 -24
- package/cpp/llama-sampling.h +3 -2
- package/cpp/llama-vocab.cpp +33 -9
- package/cpp/llama-vocab.h +30 -11
- package/cpp/llama.cpp +471 -387
- package/cpp/llama.h +52 -21
- package/cpp/log.cpp +50 -50
- package/cpp/log.h +18 -18
- package/cpp/rn-llama.hpp +23 -22
- package/cpp/sampling.cpp +110 -119
- package/cpp/sampling.h +20 -20
- package/package.json +1 -1
package/cpp/sampling.cpp
CHANGED
@@ -98,8 +98,8 @@ struct ring_buffer {
|
|
98
98
|
std::vector<T> data;
|
99
99
|
};
|
100
100
|
|
101
|
-
struct
|
102
|
-
|
101
|
+
struct common_sampler {
|
102
|
+
common_sampler_params params;
|
103
103
|
|
104
104
|
struct llama_sampler * grmr;
|
105
105
|
struct llama_sampler * chain;
|
@@ -125,26 +125,26 @@ struct gpt_sampler {
|
|
125
125
|
}
|
126
126
|
};
|
127
127
|
|
128
|
-
std::string
|
128
|
+
std::string common_sampler_params::print() const {
|
129
129
|
char result[1024];
|
130
130
|
|
131
131
|
snprintf(result, sizeof(result),
|
132
132
|
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
|
133
|
-
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
|
133
|
+
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
|
134
134
|
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
|
135
135
|
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
|
136
|
-
top_k, tfs_z, top_p, min_p, typ_p, temp,
|
136
|
+
top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
|
137
137
|
mirostat, mirostat_eta, mirostat_tau);
|
138
138
|
|
139
139
|
return std::string(result);
|
140
140
|
}
|
141
141
|
|
142
|
-
struct
|
142
|
+
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params) {
|
143
143
|
llama_sampler_chain_params lparams = llama_sampler_chain_default_params();
|
144
144
|
|
145
145
|
lparams.no_perf = params.no_perf;
|
146
146
|
|
147
|
-
auto * result = new
|
147
|
+
auto * result = new common_sampler {
|
148
148
|
/* .params = */ params,
|
149
149
|
/* .grmr = */ llama_sampler_init_grammar(model, params.grammar.c_str(), "root"),
|
150
150
|
/* .chain = */ llama_sampler_chain_init(lparams),
|
@@ -170,64 +170,53 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st
|
|
170
170
|
params.penalty_present,
|
171
171
|
params.penalize_nl,
|
172
172
|
params.ignore_eos));
|
173
|
-
|
174
|
-
if (params.
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
173
|
+
|
174
|
+
if (params.mirostat == 0) {
|
175
|
+
for (const auto & cnstr : params.samplers) {
|
176
|
+
switch (cnstr) {
|
177
|
+
case COMMON_SAMPLER_TYPE_TOP_K:
|
178
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
179
|
+
break;
|
180
|
+
case COMMON_SAMPLER_TYPE_TOP_P:
|
181
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
|
182
|
+
break;
|
183
|
+
case COMMON_SAMPLER_TYPE_MIN_P:
|
184
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
|
185
|
+
break;
|
186
|
+
case COMMON_SAMPLER_TYPE_XTC:
|
187
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
|
188
|
+
break;
|
189
|
+
case COMMON_SAMPLER_TYPE_TFS_Z:
|
190
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
|
191
|
+
break;
|
192
|
+
case COMMON_SAMPLER_TYPE_TYPICAL_P:
|
193
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
|
194
|
+
break;
|
195
|
+
case COMMON_SAMPLER_TYPE_TEMPERATURE:
|
196
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
|
197
|
+
break;
|
198
|
+
case COMMON_SAMPLER_TYPE_INFILL:
|
199
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model));
|
200
|
+
break;
|
201
|
+
default:
|
202
|
+
LM_GGML_ASSERT(false && "unknown sampler type");
|
202
203
|
}
|
203
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
|
204
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
|
205
|
-
} else if (params.mirostat == 1) {
|
206
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
207
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
|
208
|
-
} else if (params.mirostat == 2) {
|
209
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
210
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
|
211
|
-
} else {
|
212
|
-
LM_GGML_ASSERT(false && "unknown mirostat version");
|
213
204
|
}
|
205
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
|
206
|
+
} else if (params.mirostat == 1) {
|
207
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
208
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
|
209
|
+
} else if (params.mirostat == 2) {
|
210
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
211
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
|
214
212
|
} else {
|
215
|
-
|
216
|
-
// some use cases require to sample greedily, but still obtain the probabilities of the top tokens
|
217
|
-
// ref: https://github.com/ggerganov/llama.cpp/pull/9605
|
218
|
-
//
|
219
|
-
// the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
|
220
|
-
// it is much faster, since we avoid sorting all tokens and should give a good approximation
|
221
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
|
222
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
|
223
|
-
}
|
224
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_greedy());
|
213
|
+
LM_GGML_ASSERT(false && "unknown mirostat version");
|
225
214
|
}
|
226
215
|
|
227
216
|
return result;
|
228
217
|
}
|
229
218
|
|
230
|
-
void
|
219
|
+
void common_sampler_free(struct common_sampler * gsmpl) {
|
231
220
|
if (gsmpl) {
|
232
221
|
llama_sampler_free(gsmpl->grmr);
|
233
222
|
|
@@ -237,7 +226,7 @@ void gpt_sampler_free(struct gpt_sampler * gsmpl) {
|
|
237
226
|
}
|
238
227
|
}
|
239
228
|
|
240
|
-
void
|
229
|
+
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
|
241
230
|
if (accept_grammar) {
|
242
231
|
llama_sampler_accept(gsmpl->grmr, token);
|
243
232
|
}
|
@@ -247,14 +236,14 @@ void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool acce
|
|
247
236
|
gsmpl->prev.push_back(token);
|
248
237
|
}
|
249
238
|
|
250
|
-
void
|
239
|
+
void common_sampler_reset(struct common_sampler * gsmpl) {
|
251
240
|
llama_sampler_reset(gsmpl->grmr);
|
252
241
|
|
253
242
|
llama_sampler_reset(gsmpl->chain);
|
254
243
|
}
|
255
244
|
|
256
|
-
struct
|
257
|
-
return new
|
245
|
+
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
|
246
|
+
return new common_sampler {
|
258
247
|
/* .params = */ gsmpl->params,
|
259
248
|
/* .grmr = */ llama_sampler_clone(gsmpl->grmr),
|
260
249
|
/* .chain = */ llama_sampler_clone(gsmpl->chain),
|
@@ -264,7 +253,7 @@ struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl) {
|
|
264
253
|
};
|
265
254
|
}
|
266
255
|
|
267
|
-
void
|
256
|
+
void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl) {
|
268
257
|
// TODO: measure grammar performance
|
269
258
|
|
270
259
|
if (gsmpl) {
|
@@ -275,7 +264,7 @@ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler *
|
|
275
264
|
}
|
276
265
|
}
|
277
266
|
|
278
|
-
llama_token
|
267
|
+
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
|
279
268
|
gsmpl->set_logits(ctx, idx);
|
280
269
|
|
281
270
|
auto & grmr = gsmpl->grmr;
|
@@ -321,21 +310,21 @@ llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context
|
|
321
310
|
return cur_p.data[cur_p.selected].id;
|
322
311
|
}
|
323
312
|
|
324
|
-
uint32_t
|
313
|
+
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl) {
|
325
314
|
return llama_sampler_get_seed(gsmpl->chain);
|
326
315
|
}
|
327
316
|
|
328
317
|
// helpers
|
329
318
|
|
330
|
-
llama_token_data_array *
|
319
|
+
llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl) {
|
331
320
|
return &gsmpl->cur_p;
|
332
321
|
}
|
333
322
|
|
334
|
-
llama_token
|
323
|
+
llama_token common_sampler_last(const struct common_sampler * gsmpl) {
|
335
324
|
return gsmpl->prev.rat(0);
|
336
325
|
}
|
337
326
|
|
338
|
-
std::string
|
327
|
+
std::string common_sampler_print(const struct common_sampler * gsmpl) {
|
339
328
|
std::string result = "logits ";
|
340
329
|
|
341
330
|
for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
|
@@ -346,7 +335,7 @@ std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
|
|
346
335
|
return result;
|
347
336
|
}
|
348
337
|
|
349
|
-
std::string
|
338
|
+
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_main, int n) {
|
350
339
|
n = std::min(n, (int) gsmpl->prev.size());
|
351
340
|
|
352
341
|
if (n <= 0) {
|
@@ -361,68 +350,69 @@ std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx_main,
|
|
361
350
|
|
362
351
|
LM_GGML_ASSERT(id != LLAMA_TOKEN_NULL && "null token in the sampling history - should not happen");
|
363
352
|
|
364
|
-
result +=
|
353
|
+
result += common_token_to_piece(ctx_main, id);
|
365
354
|
}
|
366
355
|
|
367
356
|
return result;
|
368
357
|
}
|
369
358
|
|
370
|
-
char
|
359
|
+
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
|
371
360
|
switch (cnstr) {
|
372
|
-
case
|
373
|
-
case
|
374
|
-
case
|
375
|
-
case
|
376
|
-
case
|
377
|
-
case
|
378
|
-
case
|
361
|
+
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
|
362
|
+
case COMMON_SAMPLER_TYPE_TFS_Z: return 'f';
|
363
|
+
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
|
364
|
+
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
|
365
|
+
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
|
366
|
+
case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
|
367
|
+
case COMMON_SAMPLER_TYPE_XTC: return 'x';
|
368
|
+
case COMMON_SAMPLER_TYPE_INFILL: return 'i';
|
379
369
|
default : return '?';
|
380
370
|
}
|
381
371
|
}
|
382
372
|
|
383
|
-
std::string
|
373
|
+
std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
|
384
374
|
switch (cnstr) {
|
385
|
-
case
|
386
|
-
case
|
387
|
-
case
|
388
|
-
case
|
389
|
-
case
|
390
|
-
case
|
391
|
-
case
|
375
|
+
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
|
376
|
+
case COMMON_SAMPLER_TYPE_TFS_Z: return "tfs_z";
|
377
|
+
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
|
378
|
+
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
|
379
|
+
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
|
380
|
+
case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
|
381
|
+
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
|
382
|
+
case COMMON_SAMPLER_TYPE_INFILL: return "infill";
|
392
383
|
default : return "";
|
393
384
|
}
|
394
385
|
}
|
395
386
|
|
396
|
-
std::vector<
|
397
|
-
std::unordered_map<std::string,
|
398
|
-
{ "top_k",
|
399
|
-
{ "top_p",
|
400
|
-
{ "typ_p",
|
401
|
-
{ "min_p",
|
402
|
-
{ "tfs_z",
|
403
|
-
{ "
|
404
|
-
{ "
|
387
|
+
std::vector<common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
388
|
+
std::unordered_map<std::string, common_sampler_type> sampler_canonical_name_map {
|
389
|
+
{ "top_k", COMMON_SAMPLER_TYPE_TOP_K },
|
390
|
+
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
|
391
|
+
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
392
|
+
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
|
393
|
+
{ "tfs_z", COMMON_SAMPLER_TYPE_TFS_Z },
|
394
|
+
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
395
|
+
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
|
396
|
+
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
|
405
397
|
};
|
406
398
|
|
407
399
|
// since samplers names are written multiple ways
|
408
400
|
// make it ready for both system names and input names
|
409
|
-
std::unordered_map<std::string,
|
410
|
-
{ "top-k",
|
411
|
-
{ "top-p",
|
412
|
-
{ "nucleus",
|
413
|
-
{ "typical-p",
|
414
|
-
{ "typical",
|
415
|
-
{ "typ-p",
|
416
|
-
{ "typ",
|
417
|
-
{ "min-p",
|
418
|
-
{ "tfs-z",
|
419
|
-
{ "tfs",
|
420
|
-
{ "
|
421
|
-
{ "xtc_t", GPT_SAMPLER_TYPE_XTC},
|
422
|
-
{ "temp", GPT_SAMPLER_TYPE_TEMPERATURE },
|
401
|
+
std::unordered_map<std::string, common_sampler_type> sampler_alt_name_map {
|
402
|
+
{ "top-k", COMMON_SAMPLER_TYPE_TOP_K },
|
403
|
+
{ "top-p", COMMON_SAMPLER_TYPE_TOP_P },
|
404
|
+
{ "nucleus", COMMON_SAMPLER_TYPE_TOP_P },
|
405
|
+
{ "typical-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
406
|
+
{ "typical", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
407
|
+
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
408
|
+
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
409
|
+
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
|
410
|
+
{ "tfs-z", COMMON_SAMPLER_TYPE_TFS_Z },
|
411
|
+
{ "tfs", COMMON_SAMPLER_TYPE_TFS_Z },
|
412
|
+
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
423
413
|
};
|
424
414
|
|
425
|
-
std::vector<
|
415
|
+
std::vector<common_sampler_type> samplers;
|
426
416
|
samplers.reserve(names.size());
|
427
417
|
|
428
418
|
for (const auto & name : names) {
|
@@ -442,18 +432,19 @@ std::vector<gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std
|
|
442
432
|
return samplers;
|
443
433
|
}
|
444
434
|
|
445
|
-
std::vector<
|
446
|
-
std::unordered_map<char,
|
447
|
-
{
|
448
|
-
{
|
449
|
-
{
|
450
|
-
{
|
451
|
-
{
|
452
|
-
{
|
453
|
-
{
|
435
|
+
std::vector<common_sampler_type> common_sampler_types_from_chars(const std::string & chars) {
|
436
|
+
std::unordered_map<char, common_sampler_type> sampler_name_map = {
|
437
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
|
438
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TFS_Z), COMMON_SAMPLER_TYPE_TFS_Z },
|
439
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
|
440
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
|
441
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
|
442
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
|
443
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
|
444
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
|
454
445
|
};
|
455
446
|
|
456
|
-
std::vector<
|
447
|
+
std::vector<common_sampler_type> samplers;
|
457
448
|
samplers.reserve(chars.size());
|
458
449
|
|
459
450
|
for (const auto & c : chars) {
|
package/cpp/sampling.h
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
#include <string>
|
8
8
|
#include <vector>
|
9
9
|
|
10
|
-
//
|
10
|
+
// common_sampler extends llama_sampler with additional functionality:
|
11
11
|
//
|
12
12
|
// - grammar support
|
13
13
|
// - custom sampler logic based on the parameters
|
@@ -23,30 +23,30 @@
|
|
23
23
|
// token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
|
24
24
|
// grammar constraints are applied to the full vocabulary and the token is resampled.
|
25
25
|
//
|
26
|
-
// The
|
26
|
+
// The common_sampler also maintains a container with the last accepted tokens. In the future, this can
|
27
27
|
// be moved into the core llama library.
|
28
28
|
//
|
29
|
-
// For convenience, the
|
29
|
+
// For convenience, the common_sampler also maintains a container with the current candidate tokens.
|
30
30
|
// This can be used to access the probabilities of the rest of the non-sampled tokens.
|
31
31
|
//
|
32
32
|
// TODO: measure grammar performance
|
33
33
|
//
|
34
34
|
|
35
|
-
struct
|
35
|
+
struct common_sampler;
|
36
36
|
|
37
37
|
// llama_sampler API overloads
|
38
38
|
|
39
|
-
struct
|
39
|
+
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params);
|
40
40
|
|
41
|
-
void
|
41
|
+
void common_sampler_free(struct common_sampler * gsmpl);
|
42
42
|
|
43
43
|
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
|
44
|
-
void
|
45
|
-
void
|
46
|
-
struct
|
44
|
+
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
|
45
|
+
void common_sampler_reset (struct common_sampler * gsmpl);
|
46
|
+
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
|
47
47
|
|
48
48
|
// arguments can be nullptr to skip printing
|
49
|
-
void
|
49
|
+
void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl);
|
50
50
|
|
51
51
|
// extended sampling implementation:
|
52
52
|
//
|
@@ -58,26 +58,26 @@ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler *
|
|
58
58
|
// if grammar_first is true, the grammar is applied before the samplers (slower)
|
59
59
|
// useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
|
60
60
|
//
|
61
|
-
llama_token
|
61
|
+
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
|
62
62
|
|
63
|
-
uint32_t
|
63
|
+
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
|
64
64
|
|
65
65
|
// helpers
|
66
66
|
|
67
67
|
// access the internal list of current candidate tokens
|
68
|
-
llama_token_data_array *
|
68
|
+
llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl);
|
69
69
|
|
70
70
|
// get the last accepted token
|
71
|
-
llama_token
|
71
|
+
llama_token common_sampler_last(const struct common_sampler * gsmpl);
|
72
72
|
|
73
73
|
// print the sampler chain into a string
|
74
|
-
std::string
|
74
|
+
std::string common_sampler_print(const struct common_sampler * gsmpl);
|
75
75
|
|
76
76
|
// get a string representation of the last accepted tokens
|
77
|
-
std::string
|
77
|
+
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
|
78
78
|
|
79
|
-
char
|
80
|
-
std::string
|
79
|
+
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
|
80
|
+
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
|
81
81
|
|
82
|
-
std::vector<enum
|
83
|
-
std::vector<enum
|
82
|
+
std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
83
|
+
std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);
|