@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/CMakeLists.txt +1 -10
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +6 -4
  17. package/src/LlamaCompletionWorker.cpp +6 -6
  18. package/src/LlamaContext.cpp +7 -9
  19. package/src/common.hpp +2 -1
  20. package/src/llama.cpp/.github/workflows/build.yml +98 -24
  21. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  22. package/src/llama.cpp/.github/workflows/docker.yml +43 -34
  23. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  24. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  25. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  26. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  27. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  28. package/src/llama.cpp/CMakeLists.txt +20 -8
  29. package/src/llama.cpp/common/CMakeLists.txt +12 -10
  30. package/src/llama.cpp/common/arg.cpp +2006 -0
  31. package/src/llama.cpp/common/arg.h +77 -0
  32. package/src/llama.cpp/common/common.cpp +496 -1632
  33. package/src/llama.cpp/common/common.h +161 -63
  34. package/src/llama.cpp/common/console.cpp +3 -0
  35. package/src/llama.cpp/common/log.cpp +401 -0
  36. package/src/llama.cpp/common/log.h +66 -698
  37. package/src/llama.cpp/common/ngram-cache.cpp +3 -0
  38. package/src/llama.cpp/common/sampling.cpp +348 -350
  39. package/src/llama.cpp/common/sampling.h +62 -139
  40. package/src/llama.cpp/common/stb_image.h +5990 -6398
  41. package/src/llama.cpp/common/train.cpp +2 -0
  42. package/src/llama.cpp/docs/build.md +36 -1
  43. package/src/llama.cpp/examples/CMakeLists.txt +0 -1
  44. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
  45. package/src/llama.cpp/examples/batched/batched.cpp +39 -55
  46. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
  47. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  48. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
  49. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  50. package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
  51. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
  52. package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
  53. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  54. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
  55. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  57. package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
  58. package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
  59. package/src/llama.cpp/examples/infill/infill.cpp +117 -132
  60. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
  61. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
  62. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  63. package/src/llama.cpp/examples/llava/clip.cpp +685 -150
  64. package/src/llama.cpp/examples/llava/clip.h +11 -2
  65. package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
  66. package/src/llama.cpp/examples/llava/llava.cpp +110 -24
  67. package/src/llama.cpp/examples/llava/llava.h +2 -3
  68. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  69. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  70. package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
  71. package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
  72. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
  73. package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
  74. package/src/llama.cpp/examples/main/main.cpp +210 -262
  75. package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
  76. package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
  77. package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
  78. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  80. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
  81. package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
  82. package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
  83. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
  84. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
  85. package/src/llama.cpp/examples/server/server.cpp +1027 -1073
  86. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  87. package/src/llama.cpp/examples/server/utils.hpp +107 -105
  88. package/src/llama.cpp/examples/simple/simple.cpp +35 -41
  89. package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
  90. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  91. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  92. package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
  93. package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
  94. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  95. package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
  96. package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
  97. package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
  98. package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
  99. package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
  100. package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
  101. package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
  102. package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
  103. package/src/llama.cpp/ggml/include/ggml.h +293 -186
  104. package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
  105. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
  106. package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
  107. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
  108. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
  109. package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
  110. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  111. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  112. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  113. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  114. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  116. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  117. package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
  118. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
  120. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  121. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  122. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  123. package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
  124. package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
  125. package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
  126. package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
  127. package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
  128. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  129. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
  130. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  132. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  134. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  135. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  136. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  137. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
  138. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  141. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
  142. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
  143. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  144. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  145. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  146. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
  148. package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
  149. package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
  150. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
  151. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
  152. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
  153. package/src/llama.cpp/include/llama.h +241 -264
  154. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  155. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  156. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  157. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  158. package/src/llama.cpp/src/llama-grammar.h +120 -15
  159. package/src/llama.cpp/src/llama-impl.h +156 -1
  160. package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
  161. package/src/llama.cpp/src/llama-sampling.h +20 -47
  162. package/src/llama.cpp/src/llama-vocab.cpp +343 -120
  163. package/src/llama.cpp/src/llama-vocab.h +33 -17
  164. package/src/llama.cpp/src/llama.cpp +4247 -1525
  165. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  166. package/src/llama.cpp/src/unicode-data.h +4 -4
  167. package/src/llama.cpp/src/unicode.cpp +15 -7
  168. package/src/llama.cpp/tests/CMakeLists.txt +3 -0
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
  171. package/src/llama.cpp/tests/test-barrier.cpp +93 -0
  172. package/src/llama.cpp/tests/test-grad0.cpp +187 -70
  173. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  174. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  175. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
  176. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  177. package/src/llama.cpp/tests/test-log.cpp +39 -0
  178. package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
  179. package/src/llama.cpp/tests/test-rope.cpp +1 -1
  180. package/src/llama.cpp/tests/test-sampling.cpp +157 -98
  181. package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
  182. package/patches/llama.patch +0 -22
  183. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  184. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  185. package/src/llama.cpp/common/grammar-parser.h +0 -29
  186. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  187. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
@@ -2,159 +2,82 @@
2
2
 
3
3
  #include "llama.h"
4
4
 
5
- #include "grammar-parser.h"
5
+ #include "common.h"
6
6
 
7
- #include <random>
8
7
  #include <string>
9
- #include <unordered_map>
10
8
  #include <vector>
11
9
 
12
- // sampler types
13
- enum class llama_sampler_type : char {
14
- TOP_K = 'k',
15
- TOP_P = 'p',
16
- MIN_P = 'm',
17
- TFS_Z = 'f',
18
- TYPICAL_P = 'y',
19
- TEMPERATURE = 't'
20
- };
21
-
22
- // sampling parameters
23
- typedef struct llama_sampling_params {
24
- int32_t n_prev = 64; // number of previous tokens to remember
25
- int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
26
- int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
27
- int32_t top_k = 40; // <= 0 to use vocab size
28
- float top_p = 0.95f; // 1.0 = disabled
29
- float min_p = 0.05f; // 0.0 = disabled
30
- float tfs_z = 1.00f; // 1.0 = disabled
31
- float typical_p = 1.00f; // 1.0 = disabled
32
- float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
33
- float dynatemp_range = 0.00f; // 0.0 = disabled
34
- float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
35
- int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
36
- float penalty_repeat = 1.00f; // 1.0 = disabled
37
- float penalty_freq = 0.00f; // 0.0 = disabled
38
- float penalty_present = 0.00f; // 0.0 = disabled
39
- int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
40
- float mirostat_tau = 5.00f; // target entropy
41
- float mirostat_eta = 0.10f; // learning rate
42
- bool penalize_nl = false; // consider newlines as a repeatable token
43
- uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
44
-
45
- std::vector<llama_sampler_type> samplers_sequence = {
46
- llama_sampler_type::TOP_K,
47
- llama_sampler_type::TFS_Z,
48
- llama_sampler_type::TYPICAL_P,
49
- llama_sampler_type::TOP_P,
50
- llama_sampler_type::MIN_P,
51
- llama_sampler_type::TEMPERATURE
52
- };
53
-
54
- std::string grammar; // optional BNF-like grammar to constrain sampling
55
-
56
- // Classifier-Free Guidance
57
- // https://arxiv.org/abs/2306.17806
58
- std::string cfg_negative_prompt; // string to help guidance
59
- float cfg_scale = 1.f; // how strong is guidance
60
-
61
- std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
62
-
63
- std::vector<llama_token> penalty_prompt_tokens;
64
- bool use_penalty_prompt_tokens = false;
65
- } llama_sampling_params;
66
-
67
- // general sampler context
68
- // TODO: move to llama.h
69
- struct llama_sampling_context {
70
- // parameters that will be used for sampling
71
- llama_sampling_params params;
72
-
73
- // mirostat sampler state
74
- float mirostat_mu;
75
-
76
- llama_grammar * grammar;
77
-
78
- // internal
79
- grammar_parser::parse_state parsed_grammar;
80
-
81
- // TODO: replace with ring-buffer
82
- std::vector<llama_token> prev;
83
- std::vector<llama_token_data> cur;
84
- size_t n_valid; // Number of correct top tokens with correct probabilities.
85
-
86
- std::mt19937 rng;
87
- };
10
+ // gpt_sampler extends llama_sampler with additional functionality:
11
+ //
12
+ // - grammar support
13
+ // - custom sampler logic based on the parameters
14
+ // - history of the last accepted tokens
15
+ // - performance metrics
16
+ //
17
+ // This goal is to have a common implementation of the sampling logic shared across the examples.
18
+ // For example, depending on the temperature, the sampling chain can be very simple (greedy) or more
19
+ // complex (top-k, top-p, etc).
20
+ //
21
+ // Another example is related to the grammar. In general, the grammar constraints applied on the full
22
+ // vocabulary can be very taxing. To improve performance, the grammar can be applied only to the sampled
23
+ // token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
24
+ // grammar constraints are applied to the full vocabulary and the token is resampled.
25
+ //
26
+ // The gpt_sampler also maintains a container with the last accepted tokens. In the future, this can
27
+ // be moved into the core llama library.
28
+ //
29
+ // For convenience, the gpt_sampler also maintains a container with the current candidate tokens.
30
+ // This can be used to access the probabilities of the rest of the non-sampled tokens.
31
+ //
32
+ // TODO: measure grammar performance
33
+ //
88
34
 
89
- #include "common.h"
35
+ struct gpt_sampler;
90
36
 
91
- // Create a new sampling context instance.
92
- struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params);
37
+ // llama_sampler API overloads
93
38
 
94
- void llama_sampling_free(struct llama_sampling_context * ctx);
39
+ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const struct gpt_sampler_params & params);
95
40
 
96
- // Reset the sampler context
97
- // - clear prev tokens
98
- // - reset grammar
99
- void llama_sampling_reset(llama_sampling_context * ctx);
41
+ void gpt_sampler_free(struct gpt_sampler * gsmpl);
100
42
 
101
- // Set the sampler seed
102
- void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed);
43
+ // if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
44
+ void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool accept_grammar);
45
+ void gpt_sampler_reset (struct gpt_sampler * gsmpl);
46
+ struct gpt_sampler * gpt_sampler_clone (struct gpt_sampler * gsmpl);
103
47
 
104
- // Copy the sampler context
105
- void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst);
48
+ // arguments can be nullptr to skip printing
49
+ void gpt_perf_print(const struct llama_context * ctx, const struct gpt_sampler * gsmpl);
106
50
 
107
- // Get the last sampled token
108
- llama_token llama_sampling_last(llama_sampling_context * ctx);
51
+ // extended sampling implementation:
52
+ //
53
+ // - set logits
54
+ // - apply the configured sampler chain
55
+ // - check if the token fits the grammar (if any)
56
+ // - if not: resample by first applying the grammar constraints and then sampling again (slower path)
57
+ //
58
+ // if grammar_first is true, the grammar is applied before the samplers (slower)
59
+ // useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
60
+ //
61
+ llama_token gpt_sampler_sample(struct gpt_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
109
62
 
110
- // Get a string representation of the last sampled tokens
111
- std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n);
63
+ uint32_t gpt_sampler_get_seed(const struct gpt_sampler * gsmpl);
112
64
 
113
- // Print sampling parameters into a string
114
- std::string llama_sampling_print(const llama_sampling_params & params);
65
+ // helpers
115
66
 
116
- // Print sampling order into a string
117
- std::string llama_sampling_order_print(const llama_sampling_params & params);
67
+ // access the internal list of current candidate tokens
68
+ llama_token_data_array * gpt_sampler_get_candidates(struct gpt_sampler * gsmpl);
118
69
 
119
- std::string llama_sampling_type_to_str(llama_sampler_type sampler_type);
70
+ // get the last accepted token
71
+ llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl);
120
72
 
121
- std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
122
- std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string);
73
+ // print the sampler chain into a string
74
+ std::string gpt_sampler_print(const struct gpt_sampler * gsmpl);
123
75
 
124
- // this is a common sampling function used across the examples for convenience
125
- // it can serve as a starting point for implementing your own sampling function
126
- // Note: When using multiple sequences, it is the caller's responsibility to call
127
- // llama_sampling_reset when a sequence ends
128
- //
129
- // required:
130
- // - ctx_main: context to use for sampling
131
- // - ctx_sampling: sampling-specific context
132
- //
133
- // optional:
134
- // - ctx_cfg: context to use for classifier-free guidance
135
- // - idx: sample from llama_get_logits_ith(ctx, idx)
136
- //
137
- // returns:
138
- // - token: sampled token
139
- // - candidates: vector of candidate tokens
140
- //
141
- llama_token llama_sampling_sample(
142
- struct llama_sampling_context * ctx_sampling,
143
- struct llama_context * ctx_main,
144
- struct llama_context * ctx_cfg,
145
- int idx = -1);
146
-
147
- // Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
148
- llama_token_data_array llama_sampling_prepare(
149
- struct llama_sampling_context * ctx_sampling,
150
- struct llama_context * ctx_main,
151
- struct llama_context * ctx_cfg,
152
- int idx = 0,
153
- bool apply_grammar = true,
154
- std::vector<float> * original_logits = nullptr);
155
-
156
- void llama_sampling_accept(
157
- struct llama_sampling_context * ctx_sampling,
158
- struct llama_context * ctx_main,
159
- llama_token id,
160
- bool apply_grammar);
76
+ // get a string representation of the last accepted tokens
77
+ std::string gpt_sampler_prev_str(gpt_sampler * gsmpl, llama_context * ctx, int n);
78
+
79
+ char gpt_sampler_type_to_chr(enum gpt_sampler_type cnstr);
80
+ std::string gpt_sampler_type_to_str(enum gpt_sampler_type cnstr);
81
+
82
+ std::vector<enum gpt_sampler_type> gpt_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
83
+ std::vector<enum gpt_sampler_type> gpt_sampler_types_from_chars(const std::string & chars);