@fugood/llama.node 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -10
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +6 -4
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
|
@@ -2,159 +2,82 @@
|
|
|
2
2
|
|
|
3
3
|
#include "llama.h"
|
|
4
4
|
|
|
5
|
-
#include "
|
|
5
|
+
#include "common.h"
|
|
6
6
|
|
|
7
|
-
#include <random>
|
|
8
7
|
#include <string>
|
|
9
|
-
#include <unordered_map>
|
|
10
8
|
#include <vector>
|
|
11
9
|
|
|
12
|
-
//
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
float penalty_repeat = 1.00f; // 1.0 = disabled
|
|
37
|
-
float penalty_freq = 0.00f; // 0.0 = disabled
|
|
38
|
-
float penalty_present = 0.00f; // 0.0 = disabled
|
|
39
|
-
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
|
40
|
-
float mirostat_tau = 5.00f; // target entropy
|
|
41
|
-
float mirostat_eta = 0.10f; // learning rate
|
|
42
|
-
bool penalize_nl = false; // consider newlines as a repeatable token
|
|
43
|
-
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
|
|
44
|
-
|
|
45
|
-
std::vector<llama_sampler_type> samplers_sequence = {
|
|
46
|
-
llama_sampler_type::TOP_K,
|
|
47
|
-
llama_sampler_type::TFS_Z,
|
|
48
|
-
llama_sampler_type::TYPICAL_P,
|
|
49
|
-
llama_sampler_type::TOP_P,
|
|
50
|
-
llama_sampler_type::MIN_P,
|
|
51
|
-
llama_sampler_type::TEMPERATURE
|
|
52
|
-
};
|
|
53
|
-
|
|
54
|
-
std::string grammar; // optional BNF-like grammar to constrain sampling
|
|
55
|
-
|
|
56
|
-
// Classifier-Free Guidance
|
|
57
|
-
// https://arxiv.org/abs/2306.17806
|
|
58
|
-
std::string cfg_negative_prompt; // string to help guidance
|
|
59
|
-
float cfg_scale = 1.f; // how strong is guidance
|
|
60
|
-
|
|
61
|
-
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
|
62
|
-
|
|
63
|
-
std::vector<llama_token> penalty_prompt_tokens;
|
|
64
|
-
bool use_penalty_prompt_tokens = false;
|
|
65
|
-
} llama_sampling_params;
|
|
66
|
-
|
|
67
|
-
// general sampler context
|
|
68
|
-
// TODO: move to llama.h
|
|
69
|
-
struct llama_sampling_context {
|
|
70
|
-
// parameters that will be used for sampling
|
|
71
|
-
llama_sampling_params params;
|
|
72
|
-
|
|
73
|
-
// mirostat sampler state
|
|
74
|
-
float mirostat_mu;
|
|
75
|
-
|
|
76
|
-
llama_grammar * grammar;
|
|
77
|
-
|
|
78
|
-
// internal
|
|
79
|
-
grammar_parser::parse_state parsed_grammar;
|
|
80
|
-
|
|
81
|
-
// TODO: replace with ring-buffer
|
|
82
|
-
std::vector<llama_token> prev;
|
|
83
|
-
std::vector<llama_token_data> cur;
|
|
84
|
-
size_t n_valid; // Number of correct top tokens with correct probabilities.
|
|
85
|
-
|
|
86
|
-
std::mt19937 rng;
|
|
87
|
-
};
|
|
10
|
+
// gpt_sampler extends llama_sampler with additional functionality:
|
|
11
|
+
//
|
|
12
|
+
// - grammar support
|
|
13
|
+
// - custom sampler logic based on the parameters
|
|
14
|
+
// - history of the last accepted tokens
|
|
15
|
+
// - performance metrics
|
|
16
|
+
//
|
|
17
|
+
// This goal is to have a common implementation of the sampling logic shared across the examples.
|
|
18
|
+
// For example, depending on the temperature, the sampling chain can be very simple (greedy) or more
|
|
19
|
+
// complex (top-k, top-p, etc).
|
|
20
|
+
//
|
|
21
|
+
// Another example is related to the grammar. In general, the grammar constraints applied on the full
|
|
22
|
+
// vocabulary can be very taxing. To improve performance, the grammar can be applied only to the sampled
|
|
23
|
+
// token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
|
|
24
|
+
// grammar constraints are applied to the full vocabulary and the token is resampled.
|
|
25
|
+
//
|
|
26
|
+
// The gpt_sampler also maintains a container with the last accepted tokens. In the future, this can
|
|
27
|
+
// be moved into the core llama library.
|
|
28
|
+
//
|
|
29
|
+
// For convenience, the gpt_sampler also maintains a container with the current candidate tokens.
|
|
30
|
+
// This can be used to access the probabilities of the rest of the non-sampled tokens.
|
|
31
|
+
//
|
|
32
|
+
// TODO: measure grammar performance
|
|
33
|
+
//
|
|
88
34
|
|
|
89
|
-
|
|
35
|
+
struct gpt_sampler;
|
|
90
36
|
|
|
91
|
-
//
|
|
92
|
-
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params);
|
|
37
|
+
// llama_sampler API overloads
|
|
93
38
|
|
|
94
|
-
|
|
39
|
+
struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const struct gpt_sampler_params & params);
|
|
95
40
|
|
|
96
|
-
|
|
97
|
-
// - clear prev tokens
|
|
98
|
-
// - reset grammar
|
|
99
|
-
void llama_sampling_reset(llama_sampling_context * ctx);
|
|
41
|
+
void gpt_sampler_free(struct gpt_sampler * gsmpl);
|
|
100
42
|
|
|
101
|
-
//
|
|
102
|
-
void
|
|
43
|
+
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
|
|
44
|
+
void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool accept_grammar);
|
|
45
|
+
void gpt_sampler_reset (struct gpt_sampler * gsmpl);
|
|
46
|
+
struct gpt_sampler * gpt_sampler_clone (struct gpt_sampler * gsmpl);
|
|
103
47
|
|
|
104
|
-
//
|
|
105
|
-
void
|
|
48
|
+
// arguments can be nullptr to skip printing
|
|
49
|
+
void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler * gsmpl);
|
|
106
50
|
|
|
107
|
-
//
|
|
108
|
-
|
|
51
|
+
// extended sampling implementation:
|
|
52
|
+
//
|
|
53
|
+
// - set logits
|
|
54
|
+
// - apply the configured sampler chain
|
|
55
|
+
// - check if the token fits the grammar (if any)
|
|
56
|
+
// - if not: resample by first applying the grammar constraints and then sampling again (slower path)
|
|
57
|
+
//
|
|
58
|
+
// if grammar_first is true, the grammar is applied before the samplers (slower)
|
|
59
|
+
// useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
|
|
60
|
+
//
|
|
61
|
+
llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
|
|
109
62
|
|
|
110
|
-
|
|
111
|
-
std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n);
|
|
63
|
+
uint32_t gpt_sampler_get_seed(const struct gpt_sampler * gsmpl);
|
|
112
64
|
|
|
113
|
-
//
|
|
114
|
-
std::string llama_sampling_print(const llama_sampling_params & params);
|
|
65
|
+
// helpers
|
|
115
66
|
|
|
116
|
-
//
|
|
117
|
-
|
|
67
|
+
// access the internal list of current candidate tokens
|
|
68
|
+
llama_token_data_array * gpt_sampler_get_candidates(struct gpt_sampler * gsmpl);
|
|
118
69
|
|
|
119
|
-
|
|
70
|
+
// get the last accepted token
|
|
71
|
+
llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl);
|
|
120
72
|
|
|
121
|
-
|
|
122
|
-
std::
|
|
73
|
+
// print the sampler chain into a string
|
|
74
|
+
std::string gpt_sampler_print(const struct gpt_sampler * gsmpl);
|
|
123
75
|
|
|
124
|
-
//
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
//
|
|
133
|
-
// optional:
|
|
134
|
-
// - ctx_cfg: context to use for classifier-free guidance
|
|
135
|
-
// - idx: sample from llama_get_logits_ith(ctx, idx)
|
|
136
|
-
//
|
|
137
|
-
// returns:
|
|
138
|
-
// - token: sampled token
|
|
139
|
-
// - candidates: vector of candidate tokens
|
|
140
|
-
//
|
|
141
|
-
llama_token llama_sampling_sample(
|
|
142
|
-
struct llama_sampling_context * ctx_sampling,
|
|
143
|
-
struct llama_context * ctx_main,
|
|
144
|
-
struct llama_context * ctx_cfg,
|
|
145
|
-
int idx = -1);
|
|
146
|
-
|
|
147
|
-
// Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
|
|
148
|
-
llama_token_data_array llama_sampling_prepare(
|
|
149
|
-
struct llama_sampling_context * ctx_sampling,
|
|
150
|
-
struct llama_context * ctx_main,
|
|
151
|
-
struct llama_context * ctx_cfg,
|
|
152
|
-
int idx = 0,
|
|
153
|
-
bool apply_grammar = true,
|
|
154
|
-
std::vector<float> * original_logits = nullptr);
|
|
155
|
-
|
|
156
|
-
void llama_sampling_accept(
|
|
157
|
-
struct llama_sampling_context * ctx_sampling,
|
|
158
|
-
struct llama_context * ctx_main,
|
|
159
|
-
llama_token id,
|
|
160
|
-
bool apply_grammar);
|
|
76
|
+
// get a string representation of the last accepted tokens
|
|
77
|
+
std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx, int n);
|
|
78
|
+
|
|
79
|
+
char gpt_sampler_type_to_chr(enum gpt_sampler_type cnstr);
|
|
80
|
+
std::string gpt_sampler_type_to_str(enum gpt_sampler_type cnstr);
|
|
81
|
+
|
|
82
|
+
std::vector<enum gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
|
83
|
+
std::vector<enum gpt_sampler_type> gpt_sampler_types_from_chars(const std::string & chars);
|