@fugood/llama.node 1.4.15 → 1.6.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +1 -5
- package/lib/index.js +2 -2
- package/lib/index.ts +2 -2
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +76 -61
- package/src/LlamaContext.cpp +20 -32
- package/src/llama.cpp/common/CMakeLists.txt +12 -0
- package/src/llama.cpp/common/arg.cpp +20 -0
- package/src/llama.cpp/common/chat-parser.cpp +3 -3
- package/src/llama.cpp/common/chat-parser.h +4 -4
- package/src/llama.cpp/common/chat.cpp +289 -34
- package/src/llama.cpp/common/chat.h +32 -20
- package/src/llama.cpp/common/common.cpp +0 -1
- package/src/llama.cpp/common/common.h +31 -25
- package/src/llama.cpp/common/download.cpp +19 -14
- package/src/llama.cpp/common/jinja/caps.cpp +237 -0
- package/src/llama.cpp/common/jinja/caps.h +24 -0
- package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
- package/src/llama.cpp/common/jinja/lexer.h +157 -0
- package/src/llama.cpp/common/jinja/parser.cpp +591 -0
- package/src/llama.cpp/common/jinja/parser.h +21 -0
- package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
- package/src/llama.cpp/common/jinja/runtime.h +628 -0
- package/src/llama.cpp/common/jinja/string.cpp +207 -0
- package/src/llama.cpp/common/jinja/string.h +58 -0
- package/src/llama.cpp/common/jinja/utils.h +49 -0
- package/src/llama.cpp/common/jinja/value.cpp +1221 -0
- package/src/llama.cpp/common/jinja/value.h +464 -0
- package/src/llama.cpp/common/json-partial.h +1 -0
- package/src/llama.cpp/common/sampling.cpp +52 -19
- package/src/llama.cpp/ggml/include/ggml.h +39 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
- package/src/llama.cpp/include/llama-cpp.h +3 -1
- package/src/llama.cpp/include/llama.h +29 -2
- package/src/llama.cpp/src/llama-adapter.cpp +7 -13
- package/src/llama.cpp/src/llama-adapter.h +1 -3
- package/src/llama.cpp/src/llama-context.cpp +232 -144
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-hparams.cpp +0 -36
- package/src/llama.cpp/src/llama-hparams.h +38 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
- package/src/llama.cpp/src/llama-kv-cache.h +0 -2
- package/src/llama.cpp/src/llama-mmap.cpp +5 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
- package/src/llama.cpp/src/llama-model.cpp +5 -1
- package/src/llama.cpp/src/llama-model.h +3 -2
- package/src/llama.cpp/src/llama-sampling.cpp +170 -13
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "string.h"
|
|
4
|
+
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <cstdint>
|
|
7
|
+
#include <functional>
|
|
8
|
+
#include <map>
|
|
9
|
+
#include <memory>
|
|
10
|
+
#include <set>
|
|
11
|
+
#include <sstream>
|
|
12
|
+
#include <string>
|
|
13
|
+
#include <vector>
|
|
14
|
+
|
|
15
|
+
namespace jinja {
|
|
16
|
+
|
|
17
|
+
struct value_t;
|
|
18
|
+
using value = std::shared_ptr<value_t>;
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
// Helper to check the type of a value
|
|
22
|
+
template<typename T>
|
|
23
|
+
struct extract_pointee {
|
|
24
|
+
using type = T;
|
|
25
|
+
};
|
|
26
|
+
template<typename U>
|
|
27
|
+
struct extract_pointee<std::shared_ptr<U>> {
|
|
28
|
+
using type = U;
|
|
29
|
+
};
|
|
30
|
+
template<typename T>
|
|
31
|
+
bool is_val(const value & ptr) {
|
|
32
|
+
using PointeeType = typename extract_pointee<T>::type;
|
|
33
|
+
return dynamic_cast<const PointeeType*>(ptr.get()) != nullptr;
|
|
34
|
+
}
|
|
35
|
+
template<typename T>
|
|
36
|
+
bool is_val(const value_t * ptr) {
|
|
37
|
+
using PointeeType = typename extract_pointee<T>::type;
|
|
38
|
+
return dynamic_cast<const PointeeType*>(ptr) != nullptr;
|
|
39
|
+
}
|
|
40
|
+
template<typename T, typename... Args>
|
|
41
|
+
std::shared_ptr<typename extract_pointee<T>::type> mk_val(Args&&... args) {
|
|
42
|
+
using PointeeType = typename extract_pointee<T>::type;
|
|
43
|
+
return std::make_shared<PointeeType>(std::forward<Args>(args)...);
|
|
44
|
+
}
|
|
45
|
+
template<typename T>
|
|
46
|
+
const typename extract_pointee<T>::type * cast_val(const value & ptr) {
|
|
47
|
+
using PointeeType = typename extract_pointee<T>::type;
|
|
48
|
+
return dynamic_cast<const PointeeType*>(ptr.get());
|
|
49
|
+
}
|
|
50
|
+
template<typename T>
|
|
51
|
+
typename extract_pointee<T>::type * cast_val(value & ptr) {
|
|
52
|
+
using PointeeType = typename extract_pointee<T>::type;
|
|
53
|
+
return dynamic_cast<PointeeType*>(ptr.get());
|
|
54
|
+
}
|
|
55
|
+
// End Helper
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
struct context; // forward declaration
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
// for converting from JSON to jinja values
|
|
62
|
+
// example input JSON:
|
|
63
|
+
// {
|
|
64
|
+
// "messages": [
|
|
65
|
+
// {"role": "user", "content": "Hello!"},
|
|
66
|
+
// {"role": "assistant", "content": "Hi there!"}
|
|
67
|
+
// ],
|
|
68
|
+
// "bos_token": "<s>",
|
|
69
|
+
// "eos_token": "</s>",
|
|
70
|
+
// }
|
|
71
|
+
//
|
|
72
|
+
// to mark strings as user input, wrap them in a special object:
|
|
73
|
+
// {
|
|
74
|
+
// "messages": [
|
|
75
|
+
// {
|
|
76
|
+
// "role": "user",
|
|
77
|
+
// "content": {"__input__": "Hello!"} // this string is user input
|
|
78
|
+
// },
|
|
79
|
+
// ...
|
|
80
|
+
// ],
|
|
81
|
+
// }
|
|
82
|
+
//
|
|
83
|
+
// marking input can be useful for tracking data provenance
|
|
84
|
+
// and preventing template injection attacks
|
|
85
|
+
//
|
|
86
|
+
// Note: T_JSON can be nlohmann::ordered_json
|
|
87
|
+
template<typename T_JSON>
|
|
88
|
+
void global_from_json(context & ctx, const T_JSON & json_obj, bool mark_input);
|
|
89
|
+
|
|
90
|
+
//
|
|
91
|
+
// base value type
|
|
92
|
+
//
|
|
93
|
+
|
|
94
|
+
struct func_args; // function argument values
|
|
95
|
+
|
|
96
|
+
using func_handler = std::function<value(const func_args &)>;
|
|
97
|
+
using func_builtins = std::map<std::string, func_handler>;
|
|
98
|
+
|
|
99
|
+
enum value_compare_op { eq, ge, gt, lt, ne };
|
|
100
|
+
bool value_compare(const value & a, const value & b, value_compare_op op);
|
|
101
|
+
|
|
102
|
+
struct value_t {
|
|
103
|
+
int64_t val_int;
|
|
104
|
+
double val_flt;
|
|
105
|
+
string val_str;
|
|
106
|
+
bool val_bool;
|
|
107
|
+
|
|
108
|
+
std::vector<value> val_arr;
|
|
109
|
+
|
|
110
|
+
struct map {
|
|
111
|
+
// once set to true, all keys must be numeric
|
|
112
|
+
// caveat: we only allow either all numeric keys or all non-numeric keys
|
|
113
|
+
// for now, this only applied to for_statement in case of iterating over object keys/items
|
|
114
|
+
bool is_key_numeric = false;
|
|
115
|
+
std::map<std::string, value> unordered;
|
|
116
|
+
std::vector<std::pair<std::string, value>> ordered;
|
|
117
|
+
void insert(const std::string & key, const value & val) {
|
|
118
|
+
if (unordered.find(key) != unordered.end()) {
|
|
119
|
+
// if key exists, remove from ordered list
|
|
120
|
+
ordered.erase(std::remove_if(ordered.begin(), ordered.end(),
|
|
121
|
+
[&](const std::pair<std::string, value> & p) { return p.first == key; }),
|
|
122
|
+
ordered.end());
|
|
123
|
+
}
|
|
124
|
+
unordered[key] = val;
|
|
125
|
+
ordered.push_back({key, val});
|
|
126
|
+
}
|
|
127
|
+
} val_obj;
|
|
128
|
+
|
|
129
|
+
func_handler val_func;
|
|
130
|
+
|
|
131
|
+
// only used if ctx.is_get_stats = true
|
|
132
|
+
struct stats_t {
|
|
133
|
+
bool used = false;
|
|
134
|
+
// ops can be builtin calls or operators: "array_access", "object_access"
|
|
135
|
+
std::set<std::string> ops;
|
|
136
|
+
} stats;
|
|
137
|
+
|
|
138
|
+
value_t() = default;
|
|
139
|
+
value_t(const value_t &) = default;
|
|
140
|
+
virtual ~value_t() = default;
|
|
141
|
+
|
|
142
|
+
virtual std::string type() const { return ""; }
|
|
143
|
+
|
|
144
|
+
virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); }
|
|
145
|
+
virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); }
|
|
146
|
+
virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); }
|
|
147
|
+
virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); }
|
|
148
|
+
virtual const std::vector<value> & as_array() const { throw std::runtime_error(type() + " is not an array value"); }
|
|
149
|
+
virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
|
|
150
|
+
virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); }
|
|
151
|
+
virtual bool is_none() const { return false; }
|
|
152
|
+
virtual bool is_undefined() const { return false; }
|
|
153
|
+
virtual const func_builtins & get_builtins() const {
|
|
154
|
+
throw std::runtime_error("No builtins available for type " + type());
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
virtual bool has_key(const std::string & key) {
|
|
158
|
+
return val_obj.unordered.find(key) != val_obj.unordered.end();
|
|
159
|
+
}
|
|
160
|
+
virtual value & at(const std::string & key, value & default_val) {
|
|
161
|
+
auto it = val_obj.unordered.find(key);
|
|
162
|
+
if (it == val_obj.unordered.end()) {
|
|
163
|
+
return default_val;
|
|
164
|
+
}
|
|
165
|
+
return val_obj.unordered.at(key);
|
|
166
|
+
}
|
|
167
|
+
virtual value & at(const std::string & key) {
|
|
168
|
+
auto it = val_obj.unordered.find(key);
|
|
169
|
+
if (it == val_obj.unordered.end()) {
|
|
170
|
+
throw std::runtime_error("Key '" + key + "' not found in value of type " + type());
|
|
171
|
+
}
|
|
172
|
+
return val_obj.unordered.at(key);
|
|
173
|
+
}
|
|
174
|
+
virtual value & at(int64_t index, value & default_val) {
|
|
175
|
+
if (index < 0) {
|
|
176
|
+
index += val_arr.size();
|
|
177
|
+
}
|
|
178
|
+
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
|
|
179
|
+
return default_val;
|
|
180
|
+
}
|
|
181
|
+
return val_arr[index];
|
|
182
|
+
}
|
|
183
|
+
virtual value & at(int64_t index) {
|
|
184
|
+
if (index < 0) {
|
|
185
|
+
index += val_arr.size();
|
|
186
|
+
}
|
|
187
|
+
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
|
|
188
|
+
throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
|
|
189
|
+
}
|
|
190
|
+
return val_arr[index];
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
virtual std::string as_repr() const { return as_string().str(); }
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
//
|
|
197
|
+
// primitive value types
|
|
198
|
+
//
|
|
199
|
+
|
|
200
|
+
struct value_int_t : public value_t {
|
|
201
|
+
value_int_t(int64_t v) { val_int = v; }
|
|
202
|
+
virtual std::string type() const override { return "Integer"; }
|
|
203
|
+
virtual int64_t as_int() const override { return val_int; }
|
|
204
|
+
virtual double as_float() const override { return static_cast<double>(val_int); }
|
|
205
|
+
virtual string as_string() const override { return std::to_string(val_int); }
|
|
206
|
+
virtual bool as_bool() const override {
|
|
207
|
+
return val_int != 0;
|
|
208
|
+
}
|
|
209
|
+
virtual const func_builtins & get_builtins() const override;
|
|
210
|
+
};
|
|
211
|
+
using value_int = std::shared_ptr<value_int_t>;
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
struct value_float_t : public value_t {
|
|
215
|
+
value_float_t(double v) { val_flt = v; }
|
|
216
|
+
virtual std::string type() const override { return "Float"; }
|
|
217
|
+
virtual double as_float() const override { return val_flt; }
|
|
218
|
+
virtual int64_t as_int() const override { return static_cast<int64_t>(val_flt); }
|
|
219
|
+
virtual string as_string() const override {
|
|
220
|
+
std::string out = std::to_string(val_flt);
|
|
221
|
+
out.erase(out.find_last_not_of('0') + 1, std::string::npos); // remove trailing zeros
|
|
222
|
+
if (out.back() == '.') out.push_back('0'); // leave one zero if no decimals
|
|
223
|
+
return out;
|
|
224
|
+
}
|
|
225
|
+
virtual bool as_bool() const override {
|
|
226
|
+
return val_flt != 0.0;
|
|
227
|
+
}
|
|
228
|
+
virtual const func_builtins & get_builtins() const override;
|
|
229
|
+
};
|
|
230
|
+
using value_float = std::shared_ptr<value_float_t>;
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
struct value_string_t : public value_t {
|
|
234
|
+
value_string_t() { val_str = string(); }
|
|
235
|
+
value_string_t(const std::string & v) { val_str = string(v); }
|
|
236
|
+
value_string_t(const string & v) { val_str = v; }
|
|
237
|
+
virtual std::string type() const override { return "String"; }
|
|
238
|
+
virtual string as_string() const override { return val_str; }
|
|
239
|
+
virtual std::string as_repr() const override {
|
|
240
|
+
std::ostringstream ss;
|
|
241
|
+
for (const auto & part : val_str.parts) {
|
|
242
|
+
ss << (part.is_input ? "INPUT: " : "TMPL: ") << part.val << "\n";
|
|
243
|
+
}
|
|
244
|
+
return ss.str();
|
|
245
|
+
}
|
|
246
|
+
virtual bool as_bool() const override {
|
|
247
|
+
return val_str.length() > 0;
|
|
248
|
+
}
|
|
249
|
+
virtual const func_builtins & get_builtins() const override;
|
|
250
|
+
void mark_input() {
|
|
251
|
+
val_str.mark_input();
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
using value_string = std::shared_ptr<value_string_t>;
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
struct value_bool_t : public value_t {
|
|
258
|
+
value_bool_t(bool v) { val_bool = v; }
|
|
259
|
+
virtual std::string type() const override { return "Boolean"; }
|
|
260
|
+
virtual bool as_bool() const override { return val_bool; }
|
|
261
|
+
virtual string as_string() const override { return std::string(val_bool ? "True" : "False"); }
|
|
262
|
+
virtual const func_builtins & get_builtins() const override;
|
|
263
|
+
};
|
|
264
|
+
using value_bool = std::shared_ptr<value_bool_t>;
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
struct value_array_t : public value_t {
|
|
268
|
+
value_array_t() = default;
|
|
269
|
+
value_array_t(value & v) {
|
|
270
|
+
val_arr = v->val_arr;
|
|
271
|
+
}
|
|
272
|
+
value_array_t(const std::vector<value> & arr) {
|
|
273
|
+
val_arr = arr;
|
|
274
|
+
}
|
|
275
|
+
void reverse() { std::reverse(val_arr.begin(), val_arr.end()); }
|
|
276
|
+
void push_back(const value & val) { val_arr.push_back(val); }
|
|
277
|
+
void push_back(value && val) { val_arr.push_back(std::move(val)); }
|
|
278
|
+
value pop_at(int64_t index) {
|
|
279
|
+
if (index < 0) {
|
|
280
|
+
index = static_cast<int64_t>(val_arr.size()) + index;
|
|
281
|
+
}
|
|
282
|
+
if (index < 0 || index >= static_cast<int64_t>(val_arr.size())) {
|
|
283
|
+
throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
|
|
284
|
+
}
|
|
285
|
+
value val = val_arr.at(static_cast<size_t>(index));
|
|
286
|
+
val_arr.erase(val_arr.begin() + index);
|
|
287
|
+
return val;
|
|
288
|
+
}
|
|
289
|
+
virtual std::string type() const override { return "Array"; }
|
|
290
|
+
virtual const std::vector<value> & as_array() const override { return val_arr; }
|
|
291
|
+
virtual string as_string() const override {
|
|
292
|
+
std::ostringstream ss;
|
|
293
|
+
ss << "[";
|
|
294
|
+
for (size_t i = 0; i < val_arr.size(); i++) {
|
|
295
|
+
if (i > 0) ss << ", ";
|
|
296
|
+
ss << val_arr.at(i)->as_repr();
|
|
297
|
+
}
|
|
298
|
+
ss << "]";
|
|
299
|
+
return ss.str();
|
|
300
|
+
}
|
|
301
|
+
virtual bool as_bool() const override {
|
|
302
|
+
return !val_arr.empty();
|
|
303
|
+
}
|
|
304
|
+
virtual const func_builtins & get_builtins() const override;
|
|
305
|
+
};
|
|
306
|
+
using value_array = std::shared_ptr<value_array_t>;
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
struct value_object_t : public value_t {
|
|
310
|
+
bool has_builtins = true; // context and loop objects do not have builtins
|
|
311
|
+
value_object_t() = default;
|
|
312
|
+
value_object_t(value & v) {
|
|
313
|
+
val_obj = v->val_obj;
|
|
314
|
+
}
|
|
315
|
+
value_object_t(const std::map<std::string, value> & obj) {
|
|
316
|
+
for (const auto & pair : obj) {
|
|
317
|
+
val_obj.insert(pair.first, pair.second);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
value_object_t(const std::vector<std::pair<std::string, value>> & obj) {
|
|
321
|
+
for (const auto & pair : obj) {
|
|
322
|
+
val_obj.insert(pair.first, pair.second);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
void insert(const std::string & key, const value & val) {
|
|
326
|
+
val_obj.insert(key, val);
|
|
327
|
+
}
|
|
328
|
+
virtual std::string type() const override { return "Object"; }
|
|
329
|
+
virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const override { return val_obj.ordered; }
|
|
330
|
+
virtual bool as_bool() const override {
|
|
331
|
+
return !val_obj.unordered.empty();
|
|
332
|
+
}
|
|
333
|
+
virtual const func_builtins & get_builtins() const override;
|
|
334
|
+
};
|
|
335
|
+
using value_object = std::shared_ptr<value_object_t>;
|
|
336
|
+
|
|
337
|
+
//
|
|
338
|
+
// null and undefined types
|
|
339
|
+
//
|
|
340
|
+
|
|
341
|
+
struct value_none_t : public value_t {
|
|
342
|
+
virtual std::string type() const override { return "None"; }
|
|
343
|
+
virtual bool is_none() const override { return true; }
|
|
344
|
+
virtual bool as_bool() const override { return false; }
|
|
345
|
+
virtual std::string as_repr() const override { return type(); }
|
|
346
|
+
virtual const func_builtins & get_builtins() const override;
|
|
347
|
+
};
|
|
348
|
+
using value_none = std::shared_ptr<value_none_t>;
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
struct value_undefined_t : public value_t {
|
|
352
|
+
std::string hint; // for debugging, to indicate where undefined came from
|
|
353
|
+
value_undefined_t(const std::string & h = "") : hint(h) {}
|
|
354
|
+
virtual std::string type() const override { return hint.empty() ? "Undefined" : "Undefined (hint: '" + hint + "')"; }
|
|
355
|
+
virtual bool is_undefined() const override { return true; }
|
|
356
|
+
virtual bool as_bool() const override { return false; }
|
|
357
|
+
virtual std::string as_repr() const override { return type(); }
|
|
358
|
+
virtual const func_builtins & get_builtins() const override;
|
|
359
|
+
};
|
|
360
|
+
using value_undefined = std::shared_ptr<value_undefined_t>;
|
|
361
|
+
|
|
362
|
+
//
|
|
363
|
+
// function type
|
|
364
|
+
//
|
|
365
|
+
|
|
366
|
+
struct func_args {
|
|
367
|
+
public:
|
|
368
|
+
std::string func_name; // for error messages
|
|
369
|
+
context & ctx;
|
|
370
|
+
func_args(context & ctx) : ctx(ctx) {}
|
|
371
|
+
value get_kwarg(const std::string & key, value default_val) const;
|
|
372
|
+
value get_kwarg_or_pos(const std::string & key, size_t pos) const;
|
|
373
|
+
value get_pos(size_t pos) const;
|
|
374
|
+
value get_pos(size_t pos, value default_val) const;
|
|
375
|
+
const std::vector<value> & get_args() const;
|
|
376
|
+
size_t count() const { return args.size(); }
|
|
377
|
+
void push_back(const value & val);
|
|
378
|
+
void push_front(const value & val);
|
|
379
|
+
void ensure_count(size_t min, size_t max = 999) const {
|
|
380
|
+
size_t n = args.size();
|
|
381
|
+
if (n < min || n > max) {
|
|
382
|
+
throw std::runtime_error("Function '" + func_name + "' expected between " + std::to_string(min) + " and " + std::to_string(max) + " arguments, got " + std::to_string(n));
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
template<typename T> void ensure_val(const value & ptr) const {
|
|
386
|
+
if (!is_val<T>(ptr)) {
|
|
387
|
+
throw std::runtime_error("Function '" + func_name + "' expected value of type " + std::string(typeid(T).name()) + ", got " + ptr->type());
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
void ensure_count(bool require0, bool require1, bool require2, bool require3) const {
|
|
391
|
+
static auto bool_to_int = [](bool b) { return b ? 1 : 0; };
|
|
392
|
+
size_t required = bool_to_int(require0) + bool_to_int(require1) + bool_to_int(require2) + bool_to_int(require3);
|
|
393
|
+
ensure_count(required);
|
|
394
|
+
}
|
|
395
|
+
template<typename T0> void ensure_vals(bool required0 = true) const {
|
|
396
|
+
ensure_count(required0, false, false, false);
|
|
397
|
+
if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
|
|
398
|
+
}
|
|
399
|
+
template<typename T0, typename T1> void ensure_vals(bool required0 = true, bool required1 = true) const {
|
|
400
|
+
ensure_count(required0, required1, false, false);
|
|
401
|
+
if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
|
|
402
|
+
if (required1 && args.size() > 1) ensure_val<T1>(args[1]);
|
|
403
|
+
}
|
|
404
|
+
template<typename T0, typename T1, typename T2> void ensure_vals(bool required0 = true, bool required1 = true, bool required2 = true) const {
|
|
405
|
+
ensure_count(required0, required1, required2, false);
|
|
406
|
+
if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
|
|
407
|
+
if (required1 && args.size() > 1) ensure_val<T1>(args[1]);
|
|
408
|
+
if (required2 && args.size() > 2) ensure_val<T2>(args[2]);
|
|
409
|
+
}
|
|
410
|
+
template<typename T0, typename T1, typename T2, typename T3> void ensure_vals(bool required0 = true, bool required1 = true, bool required2 = true, bool required3 = true) const {
|
|
411
|
+
ensure_count(required0, required1, required2, required3);
|
|
412
|
+
if (required0 && args.size() > 0) ensure_val<T0>(args[0]);
|
|
413
|
+
if (required1 && args.size() > 1) ensure_val<T1>(args[1]);
|
|
414
|
+
if (required2 && args.size() > 2) ensure_val<T2>(args[2]);
|
|
415
|
+
if (required3 && args.size() > 3) ensure_val<T3>(args[3]);
|
|
416
|
+
}
|
|
417
|
+
private:
|
|
418
|
+
std::vector<value> args;
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
struct value_func_t : public value_t {
|
|
422
|
+
std::string name;
|
|
423
|
+
value arg0; // bound "this" argument, if any
|
|
424
|
+
value_func_t(const std::string & name, const func_handler & func) : name(name) {
|
|
425
|
+
val_func = func;
|
|
426
|
+
}
|
|
427
|
+
value_func_t(const std::string & name, const func_handler & func, const value & arg_this) : name(name), arg0(arg_this) {
|
|
428
|
+
val_func = func;
|
|
429
|
+
}
|
|
430
|
+
virtual value invoke(const func_args & args) const override {
|
|
431
|
+
func_args new_args(args); // copy
|
|
432
|
+
new_args.func_name = name;
|
|
433
|
+
if (arg0) {
|
|
434
|
+
new_args.push_front(arg0);
|
|
435
|
+
}
|
|
436
|
+
return val_func(new_args);
|
|
437
|
+
}
|
|
438
|
+
virtual std::string type() const override { return "Function"; }
|
|
439
|
+
virtual std::string as_repr() const override { return type(); }
|
|
440
|
+
};
|
|
441
|
+
using value_func = std::shared_ptr<value_func_t>;
|
|
442
|
+
|
|
443
|
+
// special value for kwarg
|
|
444
|
+
struct value_kwarg_t : public value_t {
|
|
445
|
+
std::string key;
|
|
446
|
+
value val;
|
|
447
|
+
value_kwarg_t(const std::string & k, const value & v) : key(k), val(v) {}
|
|
448
|
+
virtual std::string type() const override { return "KwArg"; }
|
|
449
|
+
virtual std::string as_repr() const override { return type(); }
|
|
450
|
+
};
|
|
451
|
+
using value_kwarg = std::shared_ptr<value_kwarg_t>;
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
// utils
|
|
455
|
+
|
|
456
|
+
const func_builtins & global_builtins();
|
|
457
|
+
std::string value_to_json(const value & val, int indent = -1, const std::string_view item_sep = ", ", const std::string_view key_sep = ": ");
|
|
458
|
+
|
|
459
|
+
struct not_implemented_exception : public std::runtime_error {
|
|
460
|
+
not_implemented_exception(const std::string & msg) : std::runtime_error("NotImplemented: " + msg) {}
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
} // namespace jinja
|
|
@@ -167,11 +167,11 @@ std::string common_params_sampling::print() const {
|
|
|
167
167
|
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
|
|
168
168
|
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
|
|
169
169
|
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
|
|
170
|
-
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
|
|
170
|
+
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f, adaptive_target = %.3f, adaptive_decay = %.3f",
|
|
171
171
|
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
|
|
172
172
|
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
|
|
173
173
|
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
|
|
174
|
-
mirostat, mirostat_eta, mirostat_tau);
|
|
174
|
+
mirostat, mirostat_eta, mirostat_tau, adaptive_target, adaptive_decay);
|
|
175
175
|
|
|
176
176
|
return std::string(result);
|
|
177
177
|
}
|
|
@@ -255,6 +255,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
|
|
255
255
|
}
|
|
256
256
|
|
|
257
257
|
if (params.mirostat == 0) {
|
|
258
|
+
|
|
259
|
+
bool use_adaptive_p = false; // see below
|
|
260
|
+
|
|
258
261
|
for (const auto & cnstr : params.samplers) {
|
|
259
262
|
switch (cnstr) {
|
|
260
263
|
case COMMON_SAMPLER_TYPE_DRY:
|
|
@@ -264,43 +267,54 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
|
|
264
267
|
for (const auto & str : params.dry_sequence_breakers) {
|
|
265
268
|
c_breakers.push_back(str.c_str());
|
|
266
269
|
}
|
|
267
|
-
|
|
268
|
-
samplers.push_back(llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
|
270
|
+
samplers.push_back(llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
|
269
271
|
}
|
|
270
272
|
break;
|
|
271
273
|
case COMMON_SAMPLER_TYPE_TOP_K:
|
|
272
|
-
samplers.push_back(llama_sampler_init_top_k
|
|
274
|
+
samplers.push_back(llama_sampler_init_top_k(params.top_k));
|
|
273
275
|
break;
|
|
274
276
|
case COMMON_SAMPLER_TYPE_TOP_P:
|
|
275
|
-
samplers.push_back(llama_sampler_init_top_p
|
|
277
|
+
samplers.push_back(llama_sampler_init_top_p(params.top_p, params.min_keep));
|
|
276
278
|
break;
|
|
277
279
|
case COMMON_SAMPLER_TYPE_TOP_N_SIGMA:
|
|
278
280
|
samplers.push_back(llama_sampler_init_top_n_sigma(params.top_n_sigma));
|
|
279
281
|
break;
|
|
280
282
|
case COMMON_SAMPLER_TYPE_MIN_P:
|
|
281
|
-
samplers.push_back(llama_sampler_init_min_p
|
|
283
|
+
samplers.push_back(llama_sampler_init_min_p(params.min_p, params.min_keep));
|
|
282
284
|
break;
|
|
283
285
|
case COMMON_SAMPLER_TYPE_XTC:
|
|
284
|
-
samplers.push_back(llama_sampler_init_xtc
|
|
286
|
+
samplers.push_back(llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
|
|
285
287
|
break;
|
|
286
288
|
case COMMON_SAMPLER_TYPE_TYPICAL_P:
|
|
287
|
-
samplers.push_back(llama_sampler_init_typical
|
|
289
|
+
samplers.push_back(llama_sampler_init_typical(params.typ_p, params.min_keep));
|
|
288
290
|
break;
|
|
289
291
|
case COMMON_SAMPLER_TYPE_TEMPERATURE:
|
|
290
|
-
samplers.push_back(llama_sampler_init_temp_ext
|
|
292
|
+
samplers.push_back(llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent));
|
|
291
293
|
break;
|
|
292
294
|
case COMMON_SAMPLER_TYPE_INFILL:
|
|
293
|
-
samplers.push_back(llama_sampler_init_infill
|
|
295
|
+
samplers.push_back(llama_sampler_init_infill(vocab));
|
|
294
296
|
break;
|
|
295
297
|
case COMMON_SAMPLER_TYPE_PENALTIES:
|
|
296
|
-
samplers.push_back(llama_sampler_init_penalties
|
|
298
|
+
samplers.push_back(llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
|
|
299
|
+
break;
|
|
300
|
+
case COMMON_SAMPLER_TYPE_ADAPTIVE_P:
|
|
301
|
+
// the `adaptive-p` sampler is like `dist` and `mirostat` in that it selects
|
|
302
|
+
// a single token, so we will add `dist` at the end of the chain by default,
|
|
303
|
+
// unless the user specifically included `adaptive-p`. we set this flag here
|
|
304
|
+
// so we know to add the sampler at the very end.
|
|
305
|
+
use_adaptive_p = true;
|
|
297
306
|
break;
|
|
298
307
|
default:
|
|
299
308
|
GGML_ASSERT(false && "unknown sampler type");
|
|
300
309
|
}
|
|
301
310
|
}
|
|
302
|
-
|
|
303
|
-
|
|
311
|
+
if (use_adaptive_p) {
|
|
312
|
+
// only if user explicitly included adaptive-p sampler
|
|
313
|
+
samplers.push_back(llama_sampler_init_adaptive_p(params.adaptive_target, params.adaptive_decay, params.seed));
|
|
314
|
+
} else {
|
|
315
|
+
// default: sample from distribution
|
|
316
|
+
samplers.push_back(llama_sampler_init_dist(params.seed));
|
|
317
|
+
}
|
|
304
318
|
} else if (params.mirostat == 1) {
|
|
305
319
|
samplers.push_back(llama_sampler_init_temp(params.temp));
|
|
306
320
|
samplers.push_back(llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
|
|
@@ -334,15 +348,21 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
|
|
334
348
|
}
|
|
335
349
|
|
|
336
350
|
void common_sampler_free(struct common_sampler * gsmpl) {
|
|
337
|
-
if (gsmpl) {
|
|
338
|
-
|
|
339
|
-
llama_sampler_free(gsmpl->chain);
|
|
340
|
-
|
|
341
|
-
delete gsmpl;
|
|
351
|
+
if (!gsmpl) {
|
|
352
|
+
return;
|
|
342
353
|
}
|
|
354
|
+
|
|
355
|
+
llama_sampler_free(gsmpl->grmr);
|
|
356
|
+
llama_sampler_free(gsmpl->chain);
|
|
357
|
+
|
|
358
|
+
delete gsmpl;
|
|
343
359
|
}
|
|
344
360
|
|
|
345
361
|
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
|
|
362
|
+
if (!gsmpl) {
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
|
|
346
366
|
const auto tm = gsmpl->tm();
|
|
347
367
|
|
|
348
368
|
if (gsmpl->grmr && accept_grammar) {
|
|
@@ -355,6 +375,10 @@ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, boo
|
|
|
355
375
|
}
|
|
356
376
|
|
|
357
377
|
void common_sampler_reset(struct common_sampler * gsmpl) {
|
|
378
|
+
if (!gsmpl) {
|
|
379
|
+
return;
|
|
380
|
+
}
|
|
381
|
+
|
|
358
382
|
gsmpl->reset();
|
|
359
383
|
}
|
|
360
384
|
|
|
@@ -415,6 +439,10 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
|
|
|
415
439
|
}
|
|
416
440
|
|
|
417
441
|
struct llama_sampler * common_sampler_get(const struct common_sampler * gsmpl) {
|
|
442
|
+
if (!gsmpl) {
|
|
443
|
+
return nullptr;
|
|
444
|
+
}
|
|
445
|
+
|
|
418
446
|
return gsmpl->chain;
|
|
419
447
|
}
|
|
420
448
|
|
|
@@ -611,6 +639,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
|
|
|
611
639
|
case COMMON_SAMPLER_TYPE_XTC: return 'x';
|
|
612
640
|
case COMMON_SAMPLER_TYPE_INFILL: return 'i';
|
|
613
641
|
case COMMON_SAMPLER_TYPE_PENALTIES: return 'e';
|
|
642
|
+
case COMMON_SAMPLER_TYPE_ADAPTIVE_P: return 'a';
|
|
614
643
|
default : return '?';
|
|
615
644
|
}
|
|
616
645
|
}
|
|
@@ -627,6 +656,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
|
|
|
627
656
|
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
|
|
628
657
|
case COMMON_SAMPLER_TYPE_INFILL: return "infill";
|
|
629
658
|
case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties";
|
|
659
|
+
case COMMON_SAMPLER_TYPE_ADAPTIVE_P: return "adaptive_p";
|
|
630
660
|
default : return "";
|
|
631
661
|
}
|
|
632
662
|
}
|
|
@@ -643,6 +673,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
|
|
|
643
673
|
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
|
|
644
674
|
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
|
|
645
675
|
{ "penalties", COMMON_SAMPLER_TYPE_PENALTIES },
|
|
676
|
+
{ "adaptive_p", COMMON_SAMPLER_TYPE_ADAPTIVE_P },
|
|
646
677
|
};
|
|
647
678
|
|
|
648
679
|
// since samplers names are written multiple ways
|
|
@@ -658,6 +689,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
|
|
|
658
689
|
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
|
659
690
|
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
|
|
660
691
|
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
|
692
|
+
{ "adaptive-p", COMMON_SAMPLER_TYPE_ADAPTIVE_P },
|
|
661
693
|
};
|
|
662
694
|
|
|
663
695
|
std::vector<common_sampler_type> samplers;
|
|
@@ -694,6 +726,7 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
|
|
|
694
726
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
|
|
695
727
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
|
|
696
728
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES },
|
|
729
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_ADAPTIVE_P), COMMON_SAMPLER_TYPE_ADAPTIVE_P },
|
|
697
730
|
};
|
|
698
731
|
|
|
699
732
|
std::vector<common_sampler_type> samplers;
|