@fugood/llama.node 1.4.15 → 1.5.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/lib/binding.ts +1 -5
  2. package/lib/index.js +2 -2
  3. package/lib/index.ts +2 -2
  4. package/package.json +15 -15
  5. package/scripts/llama.cpp.patch +76 -61
  6. package/src/LlamaContext.cpp +20 -32
  7. package/src/llama.cpp/common/CMakeLists.txt +12 -0
  8. package/src/llama.cpp/common/arg.cpp +20 -0
  9. package/src/llama.cpp/common/chat.cpp +289 -34
  10. package/src/llama.cpp/common/chat.h +16 -13
  11. package/src/llama.cpp/common/common.cpp +0 -1
  12. package/src/llama.cpp/common/common.h +28 -25
  13. package/src/llama.cpp/common/jinja/caps.cpp +237 -0
  14. package/src/llama.cpp/common/jinja/caps.h +24 -0
  15. package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
  16. package/src/llama.cpp/common/jinja/lexer.h +157 -0
  17. package/src/llama.cpp/common/jinja/parser.cpp +591 -0
  18. package/src/llama.cpp/common/jinja/parser.h +21 -0
  19. package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
  20. package/src/llama.cpp/common/jinja/runtime.h +628 -0
  21. package/src/llama.cpp/common/jinja/string.cpp +207 -0
  22. package/src/llama.cpp/common/jinja/string.h +58 -0
  23. package/src/llama.cpp/common/jinja/utils.h +49 -0
  24. package/src/llama.cpp/common/jinja/value.cpp +1221 -0
  25. package/src/llama.cpp/common/jinja/value.h +464 -0
  26. package/src/llama.cpp/common/sampling.cpp +52 -19
  27. package/src/llama.cpp/ggml/include/ggml.h +39 -7
  28. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
  29. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
  30. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
  31. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
  32. package/src/llama.cpp/include/llama-cpp.h +3 -1
  33. package/src/llama.cpp/include/llama.h +29 -2
  34. package/src/llama.cpp/src/llama-adapter.cpp +7 -13
  35. package/src/llama.cpp/src/llama-adapter.h +1 -3
  36. package/src/llama.cpp/src/llama-context.cpp +232 -144
  37. package/src/llama.cpp/src/llama-context.h +10 -0
  38. package/src/llama.cpp/src/llama-cparams.h +2 -0
  39. package/src/llama.cpp/src/llama-hparams.cpp +0 -36
  40. package/src/llama.cpp/src/llama-hparams.h +38 -1
  41. package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
  42. package/src/llama.cpp/src/llama-kv-cache.h +0 -2
  43. package/src/llama.cpp/src/llama-mmap.cpp +5 -1
  44. package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
  45. package/src/llama.cpp/src/llama-model.cpp +5 -1
  46. package/src/llama.cpp/src/llama-model.h +3 -2
  47. package/src/llama.cpp/src/llama-sampling.cpp +170 -13
@@ -0,0 +1,464 @@
1
+ #pragma once
2
+
3
+ #include "string.h"
4
+
5
+ #include <algorithm>
6
+ #include <cstdint>
7
+ #include <functional>
8
+ #include <map>
9
+ #include <memory>
10
+ #include <set>
11
+ #include <sstream>
12
+ #include <string>
13
+ #include <vector>
14
+
15
+ namespace jinja {
16
+
17
+ struct value_t;
18
+ using value = std::shared_ptr<value_t>;
19
+
20
+
21
+ // Helper to check the type of a value
22
+ template<typename T>
23
+ struct extract_pointee {
24
+ using type = T;
25
+ };
26
+ template<typename U>
27
+ struct extract_pointee<std::shared_ptr<U>> {
28
+ using type = U;
29
+ };
30
+ template<typename T>
31
+ bool is_val(const value & ptr) {
32
+ using PointeeType = typename extract_pointee<T>::type;
33
+ return dynamic_cast<const PointeeType*>(ptr.get()) != nullptr;
34
+ }
35
+ template<typename T>
36
+ bool is_val(const value_t * ptr) {
37
+ using PointeeType = typename extract_pointee<T>::type;
38
+ return dynamic_cast<const PointeeType*>(ptr) != nullptr;
39
+ }
40
+ template<typename T, typename... Args>
41
+ std::shared_ptr<typename extract_pointee<T>::type> mk_val(Args&&... args) {
42
+ using PointeeType = typename extract_pointee<T>::type;
43
+ return std::make_shared<PointeeType>(std::forward<Args>(args)...);
44
+ }
45
+ template<typename T>
46
+ const typename extract_pointee<T>::type * cast_val(const value & ptr) {
47
+ using PointeeType = typename extract_pointee<T>::type;
48
+ return dynamic_cast<const PointeeType*>(ptr.get());
49
+ }
50
+ template<typename T>
51
+ typename extract_pointee<T>::type * cast_val(value & ptr) {
52
+ using PointeeType = typename extract_pointee<T>::type;
53
+ return dynamic_cast<PointeeType*>(ptr.get());
54
+ }
55
+ // End Helper
56
+
57
+
58
+ struct context; // forward declaration
59
+
60
+
61
+ // for converting from JSON to jinja values
62
+ // example input JSON:
63
+ // {
64
+ // "messages": [
65
+ // {"role": "user", "content": "Hello!"},
66
+ // {"role": "assistant", "content": "Hi there!"}
67
+ // ],
68
+ // "bos_token": "<s>",
69
+ // "eos_token": "</s>",
70
+ // }
71
+ //
72
+ // to mark strings as user input, wrap them in a special object:
73
+ // {
74
+ // "messages": [
75
+ // {
76
+ // "role": "user",
77
+ // "content": {"__input__": "Hello!"} // this string is user input
78
+ // },
79
+ // ...
80
+ // ],
81
+ // }
82
+ //
83
+ // marking input can be useful for tracking data provenance
84
+ // and preventing template injection attacks
85
+ //
86
+ // Note: T_JSON can be nlohmann::ordered_json
87
+ template<typename T_JSON>
88
+ void global_from_json(context & ctx, const T_JSON & json_obj, bool mark_input);
89
+
90
+ //
91
+ // base value type
92
+ //
93
+
94
+ struct func_args; // function argument values
95
+
96
+ using func_handler = std::function<value(const func_args &)>;
97
+ using func_builtins = std::map<std::string, func_handler>;
98
+
99
+ enum value_compare_op { eq, ge, gt, lt, ne };
100
+ bool value_compare(const value & a, const value & b, value_compare_op op);
101
+
102
+ struct value_t {
103
+ int64_t val_int;
104
+ double val_flt;
105
+ string val_str;
106
+ bool val_bool;
107
+
108
+ std::vector<value> val_arr;
109
+
110
+ struct map {
111
+ // once set to true, all keys must be numeric
112
+ // caveat: we only allow either all numeric keys or all non-numeric keys
113
+ // for now, this only applied to for_statement in case of iterating over object keys/items
114
+ bool is_key_numeric = false;
115
+ std::map<std::string, value> unordered;
116
+ std::vector<std::pair<std::string, value>> ordered;
117
+ void insert(const std::string & key, const value & val) {
118
+ if (unordered.find(key) != unordered.end()) {
119
+ // if key exists, remove from ordered list
120
+ ordered.erase(std::remove_if(ordered.begin(), ordered.end(),
121
+ [&](const std::pair<std::string, value> & p) { return p.first == key; }),
122
+ ordered.end());
123
+ }
124
+ unordered[key] = val;
125
+ ordered.push_back({key, val});
126
+ }
127
+ } val_obj;
128
+
129
+ func_handler val_func;
130
+
131
+ // only used if ctx.is_get_stats = true
132
+ struct stats_t {
133
+ bool used = false;
134
+ // ops can be builtin calls or operators: "array_access", "object_access"
135
+ std::set<std::string> ops;
136
+ } stats;
137
+
138
+ value_t() = default;
139
+ value_t(const value_t &) = default;
140
+ virtual ~value_t() = default;
141
+
142
+ virtual std::string type() const { return ""; }
143
+
144
+ virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); }
145
+ virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); }
146
+ virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); }
147
+ virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); }
148
+ virtual const std::vector<value> & as_array() const { throw std::runtime_error(type() + " is not an array value"); }
149
+ virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
150
+ virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); }
151
+ virtual bool is_none() const { return false; }
152
+ virtual bool is_undefined() const { return false; }
153
+ virtual const func_builtins & get_builtins() const {
154
+ throw std::runtime_error("No builtins available for type " + type());
155
+ }
156
+
157
+ virtual bool has_key(const std::string & key) {
158
+ return val_obj.unordered.find(key) != val_obj.unordered.end();
159
+ }
160
+ virtual value & at(const std::string & key, value & default_val) {
161
+ auto it = val_obj.unordered.find(key);
162
+ if (it == val_obj.unordered.end()) {
163
+ return default_val;
164
+ }
165
+ return val_obj.unordered.at(key);
166
+ }
167
+ virtual value & at(const std::string & key) {
168
+ auto it = val_obj.unordered.find(key);
169
+ if (it == val_obj.unordered.end()) {
170
+ throw std::runtime_error("Key '" + key + "' not found in value of type " + type());
171
+ }
172
+ return val_obj.unordered.at(key);
173
+ }
174
+ virtual value & at(int64_t index, value & default_val) {
175
+ if (index < 0) {
176
+ index += val_arr.size();
177
+ }
178
+ if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
179
+ return default_val;
180
+ }
181
+ return val_arr[index];
182
+ }
183
+ virtual value & at(int64_t index) {
184
+ if (index < 0) {
185
+ index += val_arr.size();
186
+ }
187
+ if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
188
+ throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
189
+ }
190
+ return val_arr[index];
191
+ }
192
+
193
+ virtual std::string as_repr() const { return as_string().str(); }
194
+ };
195
+
196
+ //
197
+ // primitive value types
198
+ //
199
+
200
+ struct value_int_t : public value_t {
201
+ value_int_t(int64_t v) { val_int = v; }
202
+ virtual std::string type() const override { return "Integer"; }
203
+ virtual int64_t as_int() const override { return val_int; }
204
+ virtual double as_float() const override { return static_cast<double>(val_int); }
205
+ virtual string as_string() const override { return std::to_string(val_int); }
206
+ virtual bool as_bool() const override {
207
+ return val_int != 0;
208
+ }
209
+ virtual const func_builtins & get_builtins() const override;
210
+ };
211
+ using value_int = std::shared_ptr<value_int_t>;
212
+
213
+
214
+ struct value_float_t : public value_t {
215
+ value_float_t(double v) { val_flt = v; }
216
+ virtual std::string type() const override { return "Float"; }
217
+ virtual double as_float() const override { return val_flt; }
218
+ virtual int64_t as_int() const override { return static_cast<int64_t>(val_flt); }
219
+ virtual string as_string() const override {
220
+ std::string out = std::to_string(val_flt);
221
+ out.erase(out.find_last_not_of('0') + 1, std::string::npos); // remove trailing zeros
222
+ if (out.back() == '.') out.push_back('0'); // leave one zero if no decimals
223
+ return out;
224
+ }
225
+ virtual bool as_bool() const override {
226
+ return val_flt != 0.0;
227
+ }
228
+ virtual const func_builtins & get_builtins() const override;
229
+ };
230
+ using value_float = std::shared_ptr<value_float_t>;
231
+
232
+
233
+ struct value_string_t : public value_t {
234
+ value_string_t() { val_str = string(); }
235
+ value_string_t(const std::string & v) { val_str = string(v); }
236
+ value_string_t(const string & v) { val_str = v; }
237
+ virtual std::string type() const override { return "String"; }
238
+ virtual string as_string() const override { return val_str; }
239
+ virtual std::string as_repr() const override {
240
+ std::ostringstream ss;
241
+ for (const auto & part : val_str.parts) {
242
+ ss << (part.is_input ? "INPUT: " : "TMPL: ") << part.val << "\n";
243
+ }
244
+ return ss.str();
245
+ }
246
+ virtual bool as_bool() const override {
247
+ return val_str.length() > 0;
248
+ }
249
+ virtual const func_builtins & get_builtins() const override;
250
+ void mark_input() {
251
+ val_str.mark_input();
252
+ }
253
+ };
254
+ using value_string = std::shared_ptr<value_string_t>;
255
+
256
+
257
+ struct value_bool_t : public value_t {
258
+ value_bool_t(bool v) { val_bool = v; }
259
+ virtual std::string type() const override { return "Boolean"; }
260
+ virtual bool as_bool() const override { return val_bool; }
261
+ virtual string as_string() const override { return std::string(val_bool ? "True" : "False"); }
262
+ virtual const func_builtins & get_builtins() const override;
263
+ };
264
+ using value_bool = std::shared_ptr<value_bool_t>;
265
+
266
+
267
+ struct value_array_t : public value_t {
268
+ value_array_t() = default;
269
+ value_array_t(value & v) {
270
+ val_arr = v->val_arr;
271
+ }
272
+ value_array_t(const std::vector<value> & arr) {
273
+ val_arr = arr;
274
+ }
275
+ void reverse() { std::reverse(val_arr.begin(), val_arr.end()); }
276
+ void push_back(const value & val) { val_arr.push_back(val); }
277
+ void push_back(value && val) { val_arr.push_back(std::move(val)); }
278
+ value pop_at(int64_t index) {
279
+ if (index < 0) {
280
+ index = static_cast<int64_t>(val_arr.size()) + index;
281
+ }
282
+ if (index < 0 || index >= static_cast<int64_t>(val_arr.size())) {
283
+ throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
284
+ }
285
+ value val = val_arr.at(static_cast<size_t>(index));
286
+ val_arr.erase(val_arr.begin() + index);
287
+ return val;
288
+ }
289
+ virtual std::string type() const override { return "Array"; }
290
+ virtual const std::vector<value> & as_array() const override { return val_arr; }
291
+ virtual string as_string() const override {
292
+ std::ostringstream ss;
293
+ ss << "[";
294
+ for (size_t i = 0; i < val_arr.size(); i++) {
295
+ if (i > 0) ss << ", ";
296
+ ss << val_arr.at(i)->as_repr();
297
+ }
298
+ ss << "]";
299
+ return ss.str();
300
+ }
301
+ virtual bool as_bool() const override {
302
+ return !val_arr.empty();
303
+ }
304
+ virtual const func_builtins & get_builtins() const override;
305
+ };
306
+ using value_array = std::shared_ptr<value_array_t>;
307
+
308
+
309
+ struct value_object_t : public value_t {
310
+ bool has_builtins = true; // context and loop objects do not have builtins
311
+ value_object_t() = default;
312
+ value_object_t(value & v) {
313
+ val_obj = v->val_obj;
314
+ }
315
+ value_object_t(const std::map<std::string, value> & obj) {
316
+ for (const auto & pair : obj) {
317
+ val_obj.insert(pair.first, pair.second);
318
+ }
319
+ }
320
+ value_object_t(const std::vector<std::pair<std::string, value>> & obj) {
321
+ for (const auto & pair : obj) {
322
+ val_obj.insert(pair.first, pair.second);
323
+ }
324
+ }
325
+ void insert(const std::string & key, const value & val) {
326
+ val_obj.insert(key, val);
327
+ }
328
+ virtual std::string type() const override { return "Object"; }
329
+ virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const override { return val_obj.ordered; }
330
+ virtual bool as_bool() const override {
331
+ return !val_obj.unordered.empty();
332
+ }
333
+ virtual const func_builtins & get_builtins() const override;
334
+ };
335
+ using value_object = std::shared_ptr<value_object_t>;
336
+
337
+ //
338
+ // null and undefined types
339
+ //
340
+
341
+ struct value_none_t : public value_t {
342
+ virtual std::string type() const override { return "None"; }
343
+ virtual bool is_none() const override { return true; }
344
+ virtual bool as_bool() const override { return false; }
345
+ virtual std::string as_repr() const override { return type(); }
346
+ virtual const func_builtins & get_builtins() const override;
347
+ };
348
+ using value_none = std::shared_ptr<value_none_t>;
349
+
350
+
351
+ struct value_undefined_t : public value_t {
352
+ std::string hint; // for debugging, to indicate where undefined came from
353
+ value_undefined_t(const std::string & h = "") : hint(h) {}
354
+ virtual std::string type() const override { return hint.empty() ? "Undefined" : "Undefined (hint: '" + hint + "')"; }
355
+ virtual bool is_undefined() const override { return true; }
356
+ virtual bool as_bool() const override { return false; }
357
+ virtual std::string as_repr() const override { return type(); }
358
+ virtual const func_builtins & get_builtins() const override;
359
+ };
360
+ using value_undefined = std::shared_ptr<value_undefined_t>;
361
+
362
+ //
363
+ // function type
364
+ //
365
+
366
+ struct func_args {
367
+ public:
368
+ std::string func_name; // for error messages
369
+ context & ctx;
370
+ func_args(context & ctx) : ctx(ctx) {}
371
+ value get_kwarg(const std::string & key, value default_val) const;
372
+ value get_kwarg_or_pos(const std::string & key, size_t pos) const;
373
+ value get_pos(size_t pos) const;
374
+ value get_pos(size_t pos, value default_val) const;
375
+ const std::vector<value> & get_args() const;
376
+ size_t count() const { return args.size(); }
377
+ void push_back(const value & val);
378
+ void push_front(const value & val);
379
+ void ensure_count(size_t min, size_t max = 999) const {
380
+ size_t n = args.size();
381
+ if (n < min || n > max) {
382
+ throw std::runtime_error("Function '" + func_name + "' expected between " + std::to_string(min) + " and " + std::to_string(max) + " arguments, got " + std::to_string(n));
383
+ }
384
+ }
385
+ template<typename T> void ensure_val(const value & ptr) const {
386
+ if (!is_val<T>(ptr)) {
387
+ throw std::runtime_error("Function '" + func_name + "' expected value of type " + std::string(typeid(T).name()) + ", got " + ptr->type());
388
+ }
389
+ }
390
+ void ensure_count(bool require0, bool require1, bool require2, bool require3) const {
391
+ static auto bool_to_int = [](bool b) { return b ? 1 : 0; };
392
+ size_t required = bool_to_int(require0) + bool_to_int(require1) + bool_to_int(require2) + bool_to_int(require3);
393
+ ensure_count(required);
394
+ }
395
+ template<typename T0> void ensure_vals(bool required0 = true) const {
396
+ ensure_count(required0, false, false, false);
397
+ if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
398
+ }
399
+ template<typename T0, typename T1> void ensure_vals(bool required0 = true, bool required1 = true) const {
400
+ ensure_count(required0, required1, false, false);
401
+ if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
402
+ if (required1 && args.size() > 1) ensure_val<T1>(args[1]);
403
+ }
404
+ template<typename T0, typename T1, typename T2> void ensure_vals(bool required0 = true, bool required1 = true, bool required2 = true) const {
405
+ ensure_count(required0, required1, required2, false);
406
+ if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
407
+ if (required1 && args.size() > 1) ensure_val<T1>(args[1]);
408
+ if (required2 && args.size() > 2) ensure_val<T2>(args[2]);
409
+ }
410
+ template<typename T0, typename T1, typename T2, typename T3> void ensure_vals(bool required0 = true, bool required1 = true, bool required2 = true, bool required3 = true) const {
411
+ ensure_count(required0, required1, required2, required3);
412
+ if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
413
+ if (required1 && args.size() > 1) ensure_val<T1>(args[1]);
414
+ if (required2 && args.size() > 2) ensure_val<T2>(args[2]);
415
+ if (required3 && args.size() > 3) ensure_val<T3>(args[3]);
416
+ }
417
+ private:
418
+ std::vector<value> args;
419
+ };
420
+
421
+ struct value_func_t : public value_t {
422
+ std::string name;
423
+ value arg0; // bound "this" argument, if any
424
+ value_func_t(const std::string & name, const func_handler & func) : name(name) {
425
+ val_func = func;
426
+ }
427
+ value_func_t(const std::string & name, const func_handler & func, const value & arg_this) : name(name), arg0(arg_this) {
428
+ val_func = func;
429
+ }
430
+ virtual value invoke(const func_args & args) const override {
431
+ func_args new_args(args); // copy
432
+ new_args.func_name = name;
433
+ if (arg0) {
434
+ new_args.push_front(arg0);
435
+ }
436
+ return val_func(new_args);
437
+ }
438
+ virtual std::string type() const override { return "Function"; }
439
+ virtual std::string as_repr() const override { return type(); }
440
+ };
441
+ using value_func = std::shared_ptr<value_func_t>;
442
+
443
+ // special value for kwarg
444
+ struct value_kwarg_t : public value_t {
445
+ std::string key;
446
+ value val;
447
+ value_kwarg_t(const std::string & k, const value & v) : key(k), val(v) {}
448
+ virtual std::string type() const override { return "KwArg"; }
449
+ virtual std::string as_repr() const override { return type(); }
450
+ };
451
+ using value_kwarg = std::shared_ptr<value_kwarg_t>;
452
+
453
+
454
+ // utils
455
+
456
+ const func_builtins & global_builtins();
457
+ std::string value_to_json(const value & val, int indent = -1, const std::string_view item_sep = ", ", const std::string_view key_sep = ": ");
458
+
459
+ struct not_implemented_exception : public std::runtime_error {
460
+ not_implemented_exception(const std::string & msg) : std::runtime_error("NotImplemented: " + msg) {}
461
+ };
462
+
463
+
464
+ } // namespace jinja
@@ -167,11 +167,11 @@ std::string common_params_sampling::print() const {
167
167
  "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
168
168
  "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
169
169
  "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
170
- "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
170
+ "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f, adaptive_target = %.3f, adaptive_decay = %.3f",
171
171
  penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
172
172
  dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
173
173
  top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
174
- mirostat, mirostat_eta, mirostat_tau);
174
+ mirostat, mirostat_eta, mirostat_tau, adaptive_target, adaptive_decay);
175
175
 
176
176
  return std::string(result);
177
177
  }
@@ -255,6 +255,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
255
255
  }
256
256
 
257
257
  if (params.mirostat == 0) {
258
+
259
+ bool use_adaptive_p = false; // see below
260
+
258
261
  for (const auto & cnstr : params.samplers) {
259
262
  switch (cnstr) {
260
263
  case COMMON_SAMPLER_TYPE_DRY:
@@ -264,43 +267,54 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
264
267
  for (const auto & str : params.dry_sequence_breakers) {
265
268
  c_breakers.push_back(str.c_str());
266
269
  }
267
-
268
- samplers.push_back(llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
270
+ samplers.push_back(llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
269
271
  }
270
272
  break;
271
273
  case COMMON_SAMPLER_TYPE_TOP_K:
272
- samplers.push_back(llama_sampler_init_top_k (params.top_k));
274
+ samplers.push_back(llama_sampler_init_top_k(params.top_k));
273
275
  break;
274
276
  case COMMON_SAMPLER_TYPE_TOP_P:
275
- samplers.push_back(llama_sampler_init_top_p (params.top_p, params.min_keep));
277
+ samplers.push_back(llama_sampler_init_top_p(params.top_p, params.min_keep));
276
278
  break;
277
279
  case COMMON_SAMPLER_TYPE_TOP_N_SIGMA:
278
280
  samplers.push_back(llama_sampler_init_top_n_sigma(params.top_n_sigma));
279
281
  break;
280
282
  case COMMON_SAMPLER_TYPE_MIN_P:
281
- samplers.push_back(llama_sampler_init_min_p (params.min_p, params.min_keep));
283
+ samplers.push_back(llama_sampler_init_min_p(params.min_p, params.min_keep));
282
284
  break;
283
285
  case COMMON_SAMPLER_TYPE_XTC:
284
- samplers.push_back(llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
286
+ samplers.push_back(llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
285
287
  break;
286
288
  case COMMON_SAMPLER_TYPE_TYPICAL_P:
287
- samplers.push_back(llama_sampler_init_typical (params.typ_p, params.min_keep));
289
+ samplers.push_back(llama_sampler_init_typical(params.typ_p, params.min_keep));
288
290
  break;
289
291
  case COMMON_SAMPLER_TYPE_TEMPERATURE:
290
- samplers.push_back(llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
292
+ samplers.push_back(llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent));
291
293
  break;
292
294
  case COMMON_SAMPLER_TYPE_INFILL:
293
- samplers.push_back(llama_sampler_init_infill (vocab));
295
+ samplers.push_back(llama_sampler_init_infill(vocab));
294
296
  break;
295
297
  case COMMON_SAMPLER_TYPE_PENALTIES:
296
- samplers.push_back(llama_sampler_init_penalties (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
298
+ samplers.push_back(llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
299
+ break;
300
+ case COMMON_SAMPLER_TYPE_ADAPTIVE_P:
301
+ // the `adaptive-p` sampler is like `dist` and `mirostat` in that it selects
302
+ // a single token, so we will add `dist` at the end of the chain by default,
303
+ // unless the user specifically included `adaptive-p`. we set this flag here
304
+ // so we know to add the sampler at the very end.
305
+ use_adaptive_p = true;
297
306
  break;
298
307
  default:
299
308
  GGML_ASSERT(false && "unknown sampler type");
300
309
  }
301
310
  }
302
-
303
- samplers.push_back(llama_sampler_init_dist(params.seed));
311
+ if (use_adaptive_p) {
312
+ // only if user explicitly included adaptive-p sampler
313
+ samplers.push_back(llama_sampler_init_adaptive_p(params.adaptive_target, params.adaptive_decay, params.seed));
314
+ } else {
315
+ // default: sample from distribution
316
+ samplers.push_back(llama_sampler_init_dist(params.seed));
317
+ }
304
318
  } else if (params.mirostat == 1) {
305
319
  samplers.push_back(llama_sampler_init_temp(params.temp));
306
320
  samplers.push_back(llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
@@ -334,15 +348,21 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
334
348
  }
335
349
 
336
350
  void common_sampler_free(struct common_sampler * gsmpl) {
337
- if (gsmpl) {
338
- llama_sampler_free(gsmpl->grmr);
339
- llama_sampler_free(gsmpl->chain);
340
-
341
- delete gsmpl;
351
+ if (!gsmpl) {
352
+ return;
342
353
  }
354
+
355
+ llama_sampler_free(gsmpl->grmr);
356
+ llama_sampler_free(gsmpl->chain);
357
+
358
+ delete gsmpl;
343
359
  }
344
360
 
345
361
  void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
362
+ if (!gsmpl) {
363
+ return;
364
+ }
365
+
346
366
  const auto tm = gsmpl->tm();
347
367
 
348
368
  if (gsmpl->grmr && accept_grammar) {
@@ -355,6 +375,10 @@ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, boo
355
375
  }
356
376
 
357
377
  void common_sampler_reset(struct common_sampler * gsmpl) {
378
+ if (!gsmpl) {
379
+ return;
380
+ }
381
+
358
382
  gsmpl->reset();
359
383
  }
360
384
 
@@ -415,6 +439,10 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
415
439
  }
416
440
 
417
441
  struct llama_sampler * common_sampler_get(const struct common_sampler * gsmpl) {
442
+ if (!gsmpl) {
443
+ return nullptr;
444
+ }
445
+
418
446
  return gsmpl->chain;
419
447
  }
420
448
 
@@ -611,6 +639,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
611
639
  case COMMON_SAMPLER_TYPE_XTC: return 'x';
612
640
  case COMMON_SAMPLER_TYPE_INFILL: return 'i';
613
641
  case COMMON_SAMPLER_TYPE_PENALTIES: return 'e';
642
+ case COMMON_SAMPLER_TYPE_ADAPTIVE_P: return 'a';
614
643
  default : return '?';
615
644
  }
616
645
  }
@@ -627,6 +656,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
627
656
  case COMMON_SAMPLER_TYPE_XTC: return "xtc";
628
657
  case COMMON_SAMPLER_TYPE_INFILL: return "infill";
629
658
  case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties";
659
+ case COMMON_SAMPLER_TYPE_ADAPTIVE_P: return "adaptive_p";
630
660
  default : return "";
631
661
  }
632
662
  }
@@ -643,6 +673,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
643
673
  { "xtc", COMMON_SAMPLER_TYPE_XTC },
644
674
  { "infill", COMMON_SAMPLER_TYPE_INFILL },
645
675
  { "penalties", COMMON_SAMPLER_TYPE_PENALTIES },
676
+ { "adaptive_p", COMMON_SAMPLER_TYPE_ADAPTIVE_P },
646
677
  };
647
678
 
648
679
  // since samplers names are written multiple ways
@@ -658,6 +689,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
658
689
  { "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
659
690
  { "min-p", COMMON_SAMPLER_TYPE_MIN_P },
660
691
  { "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
692
+ { "adaptive-p", COMMON_SAMPLER_TYPE_ADAPTIVE_P },
661
693
  };
662
694
 
663
695
  std::vector<common_sampler_type> samplers;
@@ -694,6 +726,7 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
694
726
  { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
695
727
  { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
696
728
  { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES },
729
+ { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_ADAPTIVE_P), COMMON_SAMPLER_TYPE_ADAPTIVE_P },
697
730
  };
698
731
 
699
732
  std::vector<common_sampler_type> samplers;