cui-llama.rn 1.0.10 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +6 -0
- package/android/src/main/jni.cpp +4 -0
- package/cpp/common.cpp +91 -9
- package/cpp/common.h +2 -2
- package/cpp/ggml-backend.c +5 -8
- package/cpp/llama-sampling.cpp +92 -0
- package/cpp/llama-sampling.h +1 -0
- package/cpp/llama-vocab.cpp +18 -3
- package/cpp/llama.cpp +436 -7
- package/cpp/llama.h +11 -0
- package/cpp/sampling.cpp +5 -1
- package/cpp/sampling.h +4 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +2 -0
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +2 -0
package/README.md
CHANGED
@@ -11,6 +11,7 @@ The following features have been added for Android:
|
|
11
11
|
- `vocab_only` mode: utilize the llama.cpp tokenizer
|
12
12
|
- tokenizeSync: non-blocking, synchronous tokenizer function
|
13
13
|
- Context Shift taken from [kobold.cpp](https://github.com/LostRuins/koboldcpp)
|
14
|
+
- XTC sampling
|
14
15
|
|
15
16
|
Original repo README.md below.
|
16
17
|
|
@@ -218,6 +218,10 @@ public class LlamaContext {
|
|
218
218
|
params.hasKey("top_p") ? (float) params.getDouble("top_p") : 0.95f,
|
219
219
|
// float min_p,
|
220
220
|
params.hasKey("min_p") ? (float) params.getDouble("min_p") : 0.05f,
|
221
|
+
// float xtc_t,
|
222
|
+
params.hasKey("xtc_t") ? (float) params.getDouble("xtc_t") : 0.00f,
|
223
|
+
// float xtc_p,
|
224
|
+
params.hasKey("xtc_p") ? (float) params.getDouble("xtc_p") : 0.00f,
|
221
225
|
// float tfs_z,
|
222
226
|
params.hasKey("tfs_z") ? (float) params.getDouble("tfs_z") : 1.00f,
|
223
227
|
// float typical_p,
|
@@ -399,6 +403,8 @@ public class LlamaContext {
|
|
399
403
|
int top_k,
|
400
404
|
float top_p,
|
401
405
|
float min_p,
|
406
|
+
float xtc_t,
|
407
|
+
float xtc_p,
|
402
408
|
float tfs_z,
|
403
409
|
float typical_p,
|
404
410
|
int seed,
|
package/android/src/main/jni.cpp
CHANGED
@@ -370,6 +370,8 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
370
370
|
jint top_k,
|
371
371
|
jfloat top_p,
|
372
372
|
jfloat min_p,
|
373
|
+
jfloat xtc_t,
|
374
|
+
jfloat xtc_p,
|
373
375
|
jfloat tfs_z,
|
374
376
|
jfloat typical_p,
|
375
377
|
jint seed,
|
@@ -413,6 +415,8 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
413
415
|
sparams.typical_p = typical_p;
|
414
416
|
sparams.n_probs = n_probs;
|
415
417
|
sparams.grammar = env->GetStringUTFChars(grammar, nullptr);
|
418
|
+
sparams.xtc_t = xtc_t;
|
419
|
+
sparams.xtc_p = xtc_p;
|
416
420
|
|
417
421
|
sparams.logit_bias.clear();
|
418
422
|
if (ignore_eos) {
|
package/cpp/common.cpp
CHANGED
@@ -83,6 +83,41 @@ char const *LLAMA_BUILD_TARGET = "unknown";
|
|
83
83
|
|
84
84
|
using json = nlohmann::ordered_json;
|
85
85
|
|
86
|
+
//
|
87
|
+
// Environment variable utils
|
88
|
+
//
|
89
|
+
|
90
|
+
template<typename T>
|
91
|
+
static typename std::enable_if<std::is_same<T, std::string>::value, void>::type
|
92
|
+
get_env(std::string name, T & target) {
|
93
|
+
char * value = std::getenv(name.c_str());
|
94
|
+
target = value ? std::string(value) : target;
|
95
|
+
}
|
96
|
+
|
97
|
+
template<typename T>
|
98
|
+
static typename std::enable_if<!std::is_same<T, bool>::value && std::is_integral<T>::value, void>::type
|
99
|
+
get_env(std::string name, T & target) {
|
100
|
+
char * value = std::getenv(name.c_str());
|
101
|
+
target = value ? std::stoi(value) : target;
|
102
|
+
}
|
103
|
+
|
104
|
+
template<typename T>
|
105
|
+
static typename std::enable_if<std::is_floating_point<T>::value, void>::type
|
106
|
+
get_env(std::string name, T & target) {
|
107
|
+
char * value = std::getenv(name.c_str());
|
108
|
+
target = value ? std::stof(value) : target;
|
109
|
+
}
|
110
|
+
|
111
|
+
template<typename T>
|
112
|
+
static typename std::enable_if<std::is_same<T, bool>::value, void>::type
|
113
|
+
get_env(std::string name, T & target) {
|
114
|
+
char * value = std::getenv(name.c_str());
|
115
|
+
if (value) {
|
116
|
+
std::string val(value);
|
117
|
+
target = val == "1" || val == "true";
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
86
121
|
//
|
87
122
|
// CPU utils
|
88
123
|
//
|
@@ -116,8 +151,34 @@ int32_t cpu_get_num_physical_cores() {
|
|
116
151
|
if (result == 0) {
|
117
152
|
return num_physical_cores;
|
118
153
|
}
|
119
|
-
#elif defined(_WIN32)
|
120
|
-
//TODO:
|
154
|
+
#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
155
|
+
// TODO: windows + arm64 + mingw64
|
156
|
+
unsigned int n_threads_win = std::thread::hardware_concurrency();
|
157
|
+
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
|
158
|
+
|
159
|
+
DWORD buffer_size = 0;
|
160
|
+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
|
161
|
+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
162
|
+
return default_threads;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
std::vector<char> buffer(buffer_size);
|
167
|
+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
|
168
|
+
return default_threads;
|
169
|
+
}
|
170
|
+
|
171
|
+
int32_t num_physical_cores = 0;
|
172
|
+
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
|
173
|
+
while (buffer_size > 0) {
|
174
|
+
if (info->Relationship == RelationProcessorCore) {
|
175
|
+
num_physical_cores += info->Processor.GroupCount;
|
176
|
+
}
|
177
|
+
buffer_size -= info->Size;
|
178
|
+
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
|
179
|
+
}
|
180
|
+
|
181
|
+
return num_physical_cores > 0 ? num_physical_cores : default_threads;
|
121
182
|
#endif
|
122
183
|
unsigned int n_threads = std::thread::hardware_concurrency();
|
123
184
|
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
@@ -200,12 +261,6 @@ int32_t cpu_get_num_math() {
|
|
200
261
|
// CLI argument parsing
|
201
262
|
//
|
202
263
|
|
203
|
-
void gpt_params_handle_hf_token(gpt_params & params) {
|
204
|
-
if (params.hf_token.empty() && std::getenv("HF_TOKEN")) {
|
205
|
-
params.hf_token = std::getenv("HF_TOKEN");
|
206
|
-
}
|
207
|
-
}
|
208
|
-
|
209
264
|
void gpt_params_handle_model_default(gpt_params & params) {
|
210
265
|
if (!params.hf_repo.empty()) {
|
211
266
|
// short-hand to avoid specifying --hf-file -> default it to --model
|
@@ -253,7 +308,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
|
253
308
|
|
254
309
|
gpt_params_handle_model_default(params);
|
255
310
|
|
256
|
-
|
311
|
+
if (params.hf_token.empty()) {
|
312
|
+
get_env("HF_TOKEN", params.hf_token);
|
313
|
+
}
|
257
314
|
|
258
315
|
if (params.escape) {
|
259
316
|
string_process_escapes(params.prompt);
|
@@ -273,6 +330,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
|
273
330
|
return true;
|
274
331
|
}
|
275
332
|
|
333
|
+
void gpt_params_parse_from_env(gpt_params & params) {
|
334
|
+
// we only care about server-related params for now
|
335
|
+
get_env("LLAMA_ARG_MODEL", params.model);
|
336
|
+
get_env("LLAMA_ARG_THREADS", params.n_threads);
|
337
|
+
get_env("LLAMA_ARG_CTX_SIZE", params.n_ctx);
|
338
|
+
get_env("LLAMA_ARG_N_PARALLEL", params.n_parallel);
|
339
|
+
get_env("LLAMA_ARG_BATCH", params.n_batch);
|
340
|
+
get_env("LLAMA_ARG_UBATCH", params.n_ubatch);
|
341
|
+
get_env("LLAMA_ARG_N_GPU_LAYERS", params.n_gpu_layers);
|
342
|
+
get_env("LLAMA_ARG_THREADS_HTTP", params.n_threads_http);
|
343
|
+
get_env("LLAMA_ARG_CHAT_TEMPLATE", params.chat_template);
|
344
|
+
get_env("LLAMA_ARG_N_PREDICT", params.n_predict);
|
345
|
+
get_env("LLAMA_ARG_ENDPOINT_METRICS", params.endpoint_metrics);
|
346
|
+
get_env("LLAMA_ARG_ENDPOINT_SLOTS", params.endpoint_slots);
|
347
|
+
get_env("LLAMA_ARG_EMBEDDINGS", params.embedding);
|
348
|
+
get_env("LLAMA_ARG_FLASH_ATTN", params.flash_attn);
|
349
|
+
get_env("LLAMA_ARG_DEFRAG_THOLD", params.defrag_thold);
|
350
|
+
}
|
351
|
+
|
276
352
|
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
277
353
|
const auto params_org = params; // the example can modify the default params
|
278
354
|
|
@@ -1733,7 +1809,13 @@ std::string gpt_params_get_system_info(const gpt_params & params) {
|
|
1733
1809
|
if (params.n_threads_batch != -1) {
|
1734
1810
|
os << " (n_threads_batch = " << params.n_threads_batch << ")";
|
1735
1811
|
}
|
1812
|
+
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
1813
|
+
// TODO: windows + arm64 + mingw64
|
1814
|
+
DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
|
1815
|
+
os << " / " << logicalProcessorCount << " | " << llama_print_system_info();
|
1816
|
+
#else
|
1736
1817
|
os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
|
1818
|
+
#endif
|
1737
1819
|
|
1738
1820
|
return os.str();
|
1739
1821
|
}
|
package/cpp/common.h
CHANGED
@@ -81,7 +81,7 @@ enum dimre_method {
|
|
81
81
|
struct gpt_params {
|
82
82
|
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
83
83
|
|
84
|
-
|
84
|
+
bool vocab_only = false;
|
85
85
|
int32_t n_threads = cpu_get_num_math();
|
86
86
|
int32_t n_threads_draft = -1;
|
87
87
|
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|
@@ -279,7 +279,7 @@ struct gpt_params {
|
|
279
279
|
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
|
280
280
|
};
|
281
281
|
|
282
|
-
void
|
282
|
+
void gpt_params_parse_from_env(gpt_params & params);
|
283
283
|
void gpt_params_handle_model_default(gpt_params & params);
|
284
284
|
|
285
285
|
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
|
package/cpp/ggml-backend.c
CHANGED
@@ -1018,10 +1018,6 @@ static bool lm_ggml_is_view_op(enum lm_ggml_op op) {
|
|
1018
1018
|
#define LM_GGML_SCHED_MAX_BACKENDS 16
|
1019
1019
|
#endif
|
1020
1020
|
|
1021
|
-
#ifndef LM_GGML_SCHED_MAX_SPLITS
|
1022
|
-
#define LM_GGML_SCHED_MAX_SPLITS 2048
|
1023
|
-
#endif
|
1024
|
-
|
1025
1021
|
#ifndef LM_GGML_SCHED_MAX_SPLIT_INPUTS
|
1026
1022
|
#define LM_GGML_SCHED_MAX_SPLIT_INPUTS LM_GGML_MAX_SRC
|
1027
1023
|
#endif
|
@@ -1125,7 +1121,8 @@ static int lm_ggml_backend_sched_backend_from_buffer(lm_ggml_backend_sched_t sch
|
|
1125
1121
|
}
|
1126
1122
|
|
1127
1123
|
#if 0
|
1128
|
-
|
1124
|
+
#define LM_GGML_SCHED_MAX_SPLITS_DEBUG 4096
|
1125
|
+
static char causes[LM_GGML_DEFAULT_GRAPH_SIZE*16 + LM_GGML_SCHED_MAX_SPLITS_DEBUG*LM_GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
|
1129
1126
|
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
|
1130
1127
|
#define GET_CAUSE(node) causes[hash_id(node)]
|
1131
1128
|
#else
|
@@ -1549,7 +1546,6 @@ static void lm_ggml_backend_sched_split_graph(lm_ggml_backend_sched_t sched, str
|
|
1549
1546
|
sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct lm_ggml_backend_sched_split));
|
1550
1547
|
LM_GGML_ASSERT(sched->splits != NULL);
|
1551
1548
|
}
|
1552
|
-
LM_GGML_ASSERT(i_split < LM_GGML_SCHED_MAX_SPLITS);
|
1553
1549
|
split = &sched->splits[i_split];
|
1554
1550
|
split->backend_id = node_backend_id;
|
1555
1551
|
split->i_start = i;
|
@@ -1865,13 +1861,14 @@ lm_ggml_backend_sched_t lm_ggml_backend_sched_new(
|
|
1865
1861
|
sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
|
1866
1862
|
sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct lm_ggml_tensor *));
|
1867
1863
|
|
1868
|
-
const size_t
|
1864
|
+
const size_t lm_ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
|
1865
|
+
const size_t nodes_size = graph_size + lm_ggml_sched_max_splits*LM_GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
1869
1866
|
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
|
1870
1867
|
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
|
1871
1868
|
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
1872
1869
|
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
1873
1870
|
|
1874
|
-
sched->context_buffer_size =
|
1871
|
+
sched->context_buffer_size = lm_ggml_sched_max_splits*LM_GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct lm_ggml_tensor) + lm_ggml_graph_overhead_custom(graph_size, false);
|
1875
1872
|
sched->context_buffer = malloc(sched->context_buffer_size);
|
1876
1873
|
|
1877
1874
|
const int initial_splits_capacity = 16;
|
package/cpp/llama-sampling.cpp
CHANGED
@@ -171,6 +171,98 @@ void llama_sample_top_p_impl(struct llama_sampling * smpl, llama_token_data_arra
|
|
171
171
|
}
|
172
172
|
}
|
173
173
|
|
174
|
+
void llama_sample_xtc_impl(struct llama_sampling * smpl, llama_token_data_array * candidates, float xtc_threshold, float xtc_probability, size_t min_keep, std::mt19937 & rng) {
|
175
|
+
if(xtc_threshold <= 0.0f || !candidates-> size) {
|
176
|
+
return;
|
177
|
+
}
|
178
|
+
// TODO: xtc impl
|
179
|
+
bool xtc_applied = false;
|
180
|
+
const int64_t t_start_sample_us = lm_ggml_time_us();
|
181
|
+
|
182
|
+
// unsorted iteration
|
183
|
+
if (!candidates->sorted) {
|
184
|
+
std::vector<llama_token_data> top_tokens, low_tokens;
|
185
|
+
|
186
|
+
// split candidates into two arrays for low and high tokens
|
187
|
+
for (size_t i = 0; i < candidates->size; ++i) {
|
188
|
+
if (candidates->data[i].logit >= xtc_threshold) {
|
189
|
+
top_tokens.push_back(candidates->data[i]);
|
190
|
+
} else {
|
191
|
+
low_tokens.push_back(candidates-> data[i]);
|
192
|
+
}
|
193
|
+
}
|
194
|
+
// if there is only one or no top_tokens, do not truncate
|
195
|
+
|
196
|
+
if (top_tokens.size() <= 1) {
|
197
|
+
return;
|
198
|
+
}
|
199
|
+
|
200
|
+
// sort top_tokens
|
201
|
+
std::sort(top_tokens.begin(), top_tokens.end(), [](const llama_token_data & a, const llama_token_data & b) {
|
202
|
+
return a.logit > b.logit;
|
203
|
+
});
|
204
|
+
|
205
|
+
// insert top_tokens with probability. Always insert lowest top_token
|
206
|
+
low_tokens.push_back(top_tokens[0]);
|
207
|
+
std::uniform_real_distribution<float> random_float(0.0 , 1.0);
|
208
|
+
for (size_t i = 1; i < top_tokens.size(); ++i) {
|
209
|
+
if(random_float(rng) <= xtc_probability) {
|
210
|
+
low_tokens.push_back(top_tokens[i]);
|
211
|
+
}
|
212
|
+
}
|
213
|
+
if(low_tokens.size() >= min_keep) {
|
214
|
+
memcpy(candidates->data, low_tokens.data(), low_tokens.size()*sizeof(llama_token_data));
|
215
|
+
candidates->size = low_tokens.size();
|
216
|
+
xtc_applied = true;
|
217
|
+
}
|
218
|
+
}
|
219
|
+
// sorted iteration
|
220
|
+
|
221
|
+
if (!xtc_applied) {
|
222
|
+
// Sort the logits in descending order
|
223
|
+
if (!candidates->sorted) {
|
224
|
+
std::sort(candidates->data, candidates->data + candidates->size, [](const llama_token_data & a, const llama_token_data & b) {
|
225
|
+
return a.logit > b.logit;
|
226
|
+
});
|
227
|
+
candidates->sorted = true;
|
228
|
+
}
|
229
|
+
|
230
|
+
// find last token over threshold
|
231
|
+
|
232
|
+
size_t last_index = 0;
|
233
|
+
|
234
|
+
for (; last_index < candidates -> size; ++last_index) {
|
235
|
+
if(candidates -> data[last_index].logit < xtc_threshold) {
|
236
|
+
break;
|
237
|
+
}
|
238
|
+
}
|
239
|
+
last_index--;
|
240
|
+
// check if only 1 last index token or total less than min_keep
|
241
|
+
if(last_index <= 1 || candidates-> size - last_index < min_keep) {
|
242
|
+
return;
|
243
|
+
}
|
244
|
+
// indexes to be skipped
|
245
|
+
size_t safe_index = 0;
|
246
|
+
// remove tokens until last threshold item
|
247
|
+
candidates -> data;
|
248
|
+
std::uniform_real_distribution<float> random_float(0.0 , 1.0);
|
249
|
+
for (size_t i = 0; i < last_index; i++) {
|
250
|
+
if(random_float(rng) < xtc_probability) {
|
251
|
+
if(i != safe_index) {
|
252
|
+
std::swap(candidates-> data[i], candidates->data[safe_index]);
|
253
|
+
}
|
254
|
+
safe_index++;
|
255
|
+
}
|
256
|
+
}
|
257
|
+
candidates -> data = candidates -> data + safe_index;
|
258
|
+
candidates -> size = candidates -> size - safe_index;
|
259
|
+
}
|
260
|
+
|
261
|
+
if (smpl) {
|
262
|
+
smpl->t_sample_us += lm_ggml_time_us() - t_start_sample_us;
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
174
266
|
void llama_sample_min_p_impl(struct llama_sampling * smpl, llama_token_data_array * candidates, float p, size_t min_keep) {
|
175
267
|
if (p <= 0.0f || !candidates->size) {
|
176
268
|
return;
|
package/cpp/llama-sampling.h
CHANGED
@@ -32,6 +32,7 @@ void llama_sample_tail_free_impl(struct llama_sampling * smpl, llama_token_data_
|
|
32
32
|
void llama_sample_typical_impl (struct llama_sampling * smpl, llama_token_data_array * candidates, float p, size_t min_keep);
|
33
33
|
void llama_sample_entropy_impl (struct llama_sampling * smpl, llama_token_data_array * candidates, float min_temp, float max_temp, float exponent_val);
|
34
34
|
void llama_sample_temp_impl (struct llama_sampling * smpl, llama_token_data_array * candidates, float temp);
|
35
|
+
void llama_sample_xtc_impl (struct llama_sampling * smpl, llama_token_data_array * candidates, float xtc_threshold, float xtc_probability, size_t min_keep, std::mt19937 & rng);
|
35
36
|
|
36
37
|
void llama_sample_repetition_penalties_impl(
|
37
38
|
struct llama_sampling * smpl,
|
package/cpp/llama-vocab.cpp
CHANGED
@@ -321,6 +321,21 @@ private:
|
|
321
321
|
|
322
322
|
// TODO: there are a lot of common parts between spm and bpe tokenizers, should be refactored and reused
|
323
323
|
|
324
|
+
template<typename T, typename Container = std::vector<T>, typename Compare = std::less<typename Container::value_type>>
|
325
|
+
class llama_priority_queue : public std::priority_queue<T, Container, Compare> {
|
326
|
+
public:
|
327
|
+
using std::priority_queue<T, Container, Compare>::priority_queue;
|
328
|
+
|
329
|
+
T pop_move() {
|
330
|
+
T item = std::move(this->c.front());
|
331
|
+
std::pop_heap(this->c.begin(), this->c.end(), this->comp);
|
332
|
+
this->c.pop_back();
|
333
|
+
return item;
|
334
|
+
}
|
335
|
+
|
336
|
+
void pop() = delete;
|
337
|
+
};
|
338
|
+
|
324
339
|
struct llm_bigram_bpe {
|
325
340
|
struct comparator {
|
326
341
|
bool operator()(const llm_bigram_bpe & l, const llm_bigram_bpe & r) const {
|
@@ -329,7 +344,7 @@ struct llm_bigram_bpe {
|
|
329
344
|
};
|
330
345
|
|
331
346
|
using queue_storage = std::vector<llm_bigram_bpe>;
|
332
|
-
using queue =
|
347
|
+
using queue = llama_priority_queue<llm_bigram_bpe, queue_storage, comparator>;
|
333
348
|
llm_symbol::index left;
|
334
349
|
llm_symbol::index right;
|
335
350
|
std::string text;
|
@@ -388,6 +403,7 @@ struct llm_tokenizer_bpe {
|
|
388
403
|
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
|
389
404
|
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
|
390
405
|
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
|
406
|
+
case LLAMA_VOCAB_PRE_TYPE_EXAONE:
|
391
407
|
regex_exprs = {
|
392
408
|
"\\p{N}",
|
393
409
|
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
@@ -519,8 +535,7 @@ struct llm_tokenizer_bpe {
|
|
519
535
|
|
520
536
|
// build token(s)
|
521
537
|
while (!work_queue.empty()) {
|
522
|
-
auto bigram = work_queue.
|
523
|
-
work_queue.pop();
|
538
|
+
auto bigram = work_queue.pop_move();
|
524
539
|
|
525
540
|
auto & left_symbol = symbols[bigram.left];
|
526
541
|
auto & right_symbol = symbols[bigram.right];
|
package/cpp/llama.cpp
CHANGED
@@ -221,6 +221,8 @@ enum llm_arch {
|
|
221
221
|
LLM_ARCH_T5,
|
222
222
|
LLM_ARCH_T5ENCODER,
|
223
223
|
LLM_ARCH_JAIS,
|
224
|
+
LLM_ARCH_NEMOTRON,
|
225
|
+
LLM_ARCH_EXAONE,
|
224
226
|
LLM_ARCH_UNKNOWN,
|
225
227
|
};
|
226
228
|
|
@@ -266,6 +268,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
266
268
|
{ LLM_ARCH_T5, "t5" },
|
267
269
|
{ LLM_ARCH_T5ENCODER, "t5encoder" },
|
268
270
|
{ LLM_ARCH_JAIS, "jais" },
|
271
|
+
{ LLM_ARCH_NEMOTRON, "nemotron" },
|
272
|
+
{ LLM_ARCH_EXAONE, "exaone" },
|
269
273
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
270
274
|
};
|
271
275
|
|
@@ -335,6 +339,7 @@ enum llm_kv {
|
|
335
339
|
LLM_KV_SSM_CONV_KERNEL,
|
336
340
|
LLM_KV_SSM_STATE_SIZE,
|
337
341
|
LLM_KV_SSM_TIME_STEP_RANK,
|
342
|
+
LLM_KV_SSM_DT_B_C_RMS,
|
338
343
|
|
339
344
|
LLM_KV_TOKENIZER_MODEL,
|
340
345
|
LLM_KV_TOKENIZER_PRE,
|
@@ -433,6 +438,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
433
438
|
{ LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
|
434
439
|
{ LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
|
435
440
|
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
|
441
|
+
{ LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
|
436
442
|
|
437
443
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
438
444
|
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
@@ -1307,6 +1313,43 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
|
|
1307
1313
|
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
1308
1314
|
},
|
1309
1315
|
},
|
1316
|
+
{
|
1317
|
+
LLM_ARCH_NEMOTRON,
|
1318
|
+
{
|
1319
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1320
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1321
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
1322
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
1323
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1324
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
1325
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
1326
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
1327
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
1328
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
1329
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
1330
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
1331
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
1332
|
+
},
|
1333
|
+
},
|
1334
|
+
{
|
1335
|
+
LLM_ARCH_EXAONE,
|
1336
|
+
{
|
1337
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1338
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1339
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
1340
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
1341
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1342
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
1343
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
1344
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
1345
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
1346
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
1347
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
1348
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
1349
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
1350
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
1351
|
+
},
|
1352
|
+
},
|
1310
1353
|
{
|
1311
1354
|
LLM_ARCH_UNKNOWN,
|
1312
1355
|
{
|
@@ -2207,6 +2250,7 @@ struct llama_hparams {
|
|
2207
2250
|
uint32_t ssm_d_inner = 0;
|
2208
2251
|
uint32_t ssm_d_state = 0;
|
2209
2252
|
uint32_t ssm_dt_rank = 0;
|
2253
|
+
bool ssm_dt_b_c_rms = false;
|
2210
2254
|
|
2211
2255
|
float f_clamp_kqv = 0.0f;
|
2212
2256
|
float f_max_alibi_bias = 0.0f;
|
@@ -2256,6 +2300,7 @@ struct llama_hparams {
|
|
2256
2300
|
if (this->ssm_d_inner != other.ssm_d_inner) return true;
|
2257
2301
|
if (this->ssm_d_state != other.ssm_d_state) return true;
|
2258
2302
|
if (this->ssm_dt_rank != other.ssm_dt_rank) return true;
|
2303
|
+
if (this->ssm_dt_b_c_rms != other.ssm_dt_b_c_rms) return true;
|
2259
2304
|
|
2260
2305
|
if (this->dec_start_token_id != other.dec_start_token_id) return true;
|
2261
2306
|
|
@@ -5022,6 +5067,7 @@ static void llm_load_hparams(
|
|
5022
5067
|
ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
|
5023
5068
|
ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
|
5024
5069
|
ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
|
5070
|
+
ml.get_key(LLM_KV_SSM_DT_B_C_RMS, hparams.ssm_dt_b_c_rms, false);
|
5025
5071
|
|
5026
5072
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
5027
5073
|
|
@@ -5246,6 +5292,23 @@ static void llm_load_hparams(
|
|
5246
5292
|
default: model.type = e_model::MODEL_UNKNOWN;
|
5247
5293
|
}
|
5248
5294
|
} break;
|
5295
|
+
case LLM_ARCH_NEMOTRON:
|
5296
|
+
{
|
5297
|
+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
5298
|
+
switch (hparams.n_layer) {
|
5299
|
+
case 32: model.type = e_model::MODEL_4B; break;
|
5300
|
+
default: model.type = e_model::MODEL_UNKNOWN;
|
5301
|
+
}
|
5302
|
+
} break;
|
5303
|
+
case LLM_ARCH_EXAONE:
|
5304
|
+
{
|
5305
|
+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
5306
|
+
|
5307
|
+
switch (hparams.n_layer) {
|
5308
|
+
case 32: model.type = e_model::MODEL_8B; break;
|
5309
|
+
default: model.type = e_model::MODEL_UNKNOWN;
|
5310
|
+
}
|
5311
|
+
} break;
|
5249
5312
|
default: (void)0;
|
5250
5313
|
}
|
5251
5314
|
|
@@ -5484,6 +5547,9 @@ static void llm_load_vocab(
|
|
5484
5547
|
} else if (
|
5485
5548
|
tokenizer_pre == "gpt3-finnish") {
|
5486
5549
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH;
|
5550
|
+
} else if (
|
5551
|
+
tokenizer_pre == "exaone") {
|
5552
|
+
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_EXAONE;
|
5487
5553
|
} else {
|
5488
5554
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
5489
5555
|
}
|
@@ -5857,6 +5923,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
|
|
5857
5923
|
LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner);
|
5858
5924
|
LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state);
|
5859
5925
|
LLAMA_LOG_INFO("%s: ssm_dt_rank = %u\n", __func__, hparams.ssm_dt_rank);
|
5926
|
+
LLAMA_LOG_INFO("%s: ssm_dt_b_c_rms = %d\n", __func__, hparams.ssm_dt_b_c_rms);
|
5860
5927
|
}
|
5861
5928
|
|
5862
5929
|
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
|
@@ -6122,9 +6189,9 @@ static bool llm_load_tensors(
|
|
6122
6189
|
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
6123
6190
|
|
6124
6191
|
// optional MLP bias
|
6125
|
-
layer.ffn_gate_b = ml.create_tensor(
|
6126
|
-
layer.ffn_down_b = ml.create_tensor(
|
6127
|
-
layer.ffn_up_b = ml.create_tensor(
|
6192
|
+
layer.ffn_gate_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
6193
|
+
layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
6194
|
+
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
6128
6195
|
} else {
|
6129
6196
|
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
|
6130
6197
|
|
@@ -6448,7 +6515,7 @@ static bool llm_load_tensors(
|
|
6448
6515
|
layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa});
|
6449
6516
|
|
6450
6517
|
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); //output_dens
|
6451
|
-
layer.bo = ml.create_tensor(
|
6518
|
+
layer.bo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); //output_dens
|
6452
6519
|
|
6453
6520
|
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
|
6454
6521
|
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
|
@@ -7579,6 +7646,78 @@ static bool llm_load_tensors(
|
|
7579
7646
|
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
|
7580
7647
|
}
|
7581
7648
|
} break;
|
7649
|
+
case LLM_ARCH_NEMOTRON:
|
7650
|
+
{
|
7651
|
+
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
7652
|
+
|
7653
|
+
// output
|
7654
|
+
{
|
7655
|
+
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
7656
|
+
model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd});
|
7657
|
+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
|
7658
|
+
}
|
7659
|
+
|
7660
|
+
for (int i = 0; i < n_layer; ++i) {
|
7661
|
+
lm_ggml_context * ctx_layer = ctx_for_layer(i);
|
7662
|
+
lm_ggml_context * ctx_split = ctx_for_layer_split(i);
|
7663
|
+
|
7664
|
+
auto & layer = model.layers[i];
|
7665
|
+
|
7666
|
+
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
|
7667
|
+
layer.attn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd});
|
7668
|
+
|
7669
|
+
layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd});
|
7670
|
+
layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa});
|
7671
|
+
layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa});
|
7672
|
+
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
|
7673
|
+
|
7674
|
+
// optional bias tensors
|
7675
|
+
layer.bq = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
7676
|
+
layer.bk = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
7677
|
+
layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
7678
|
+
layer.bo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
7679
|
+
|
7680
|
+
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
|
7681
|
+
layer.ffn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd});
|
7682
|
+
|
7683
|
+
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
|
7684
|
+
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
7685
|
+
|
7686
|
+
// optional MLP bias
|
7687
|
+
layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
7688
|
+
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
7689
|
+
}
|
7690
|
+
} break;
|
7691
|
+
case LLM_ARCH_EXAONE:
|
7692
|
+
{
|
7693
|
+
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
7694
|
+
|
7695
|
+
// output
|
7696
|
+
{
|
7697
|
+
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
7698
|
+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
|
7699
|
+
}
|
7700
|
+
|
7701
|
+
for (int i = 0; i < n_layer; ++i) {
|
7702
|
+
lm_ggml_context * ctx_layer = ctx_for_layer(i);
|
7703
|
+
lm_ggml_context * ctx_split = ctx_for_layer_split(i);
|
7704
|
+
|
7705
|
+
auto & layer = model.layers[i];
|
7706
|
+
|
7707
|
+
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
|
7708
|
+
|
7709
|
+
layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head});
|
7710
|
+
layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa});
|
7711
|
+
layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa});
|
7712
|
+
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd});
|
7713
|
+
|
7714
|
+
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
|
7715
|
+
layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), {n_embd/n_head/2}, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
|
7716
|
+
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
|
7717
|
+
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
|
7718
|
+
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
7719
|
+
}
|
7720
|
+
} break;
|
7582
7721
|
default:
|
7583
7722
|
throw std::runtime_error("unknown architecture");
|
7584
7723
|
}
|
@@ -8265,7 +8404,7 @@ static struct lm_ggml_tensor * llm_build_kqv(
|
|
8265
8404
|
struct lm_ggml_tensor * kq = lm_ggml_mul_mat(ctx, k, q);
|
8266
8405
|
cb(kq, "kq", il);
|
8267
8406
|
|
8268
|
-
if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX || model.arch == LLM_ARCH_QWEN2) {
|
8407
|
+
if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX || model.arch == LLM_ARCH_QWEN2 || model.arch == LLM_ARCH_NEMOTRON) {
|
8269
8408
|
// for this arch, we need to perform the KQ multiplication with F32 precision, otherwise we get NaNs
|
8270
8409
|
// ref: https://github.com/ggerganov/llama.cpp/pull/4490#issuecomment-1859055847
|
8271
8410
|
lm_ggml_mul_mat_set_prec(kq, LM_GGML_PREC_F32);
|
@@ -12039,6 +12178,10 @@ struct llm_build_context {
|
|
12039
12178
|
LM_GGML_ASSERT(2 * d_model == d_inner);
|
12040
12179
|
const int64_t d_state = hparams.ssm_d_state;
|
12041
12180
|
const int64_t dt_rank = hparams.ssm_dt_rank;
|
12181
|
+
// Some variants of Mamba arch (e.g. FalconMamba do apply layer norm on B and Dt layers)
|
12182
|
+
const bool ssm_dt_b_c_rms = hparams.ssm_dt_b_c_rms;
|
12183
|
+
// Use the same RMS norm as the final layer norm
|
12184
|
+
const float norm_rms_eps = hparams.f_norm_rms_eps;
|
12042
12185
|
|
12043
12186
|
struct lm_ggml_tensor * cur;
|
12044
12187
|
struct lm_ggml_tensor * inpL;
|
@@ -12119,6 +12262,13 @@ struct llm_build_context {
|
|
12119
12262
|
struct lm_ggml_tensor * B = lm_ggml_view_2d(ctx0, x_db, d_state, n_tokens, x_db->nb[1], lm_ggml_element_size(x_db)*dt_rank);
|
12120
12263
|
struct lm_ggml_tensor * C = lm_ggml_view_2d(ctx0, x_db, d_state, n_tokens, x_db->nb[1], lm_ggml_element_size(x_db)*(dt_rank+d_state));
|
12121
12264
|
|
12265
|
+
// Some Mamba variants (e.g. FalconMamba) apply RMS norm in B, C & Dt layers
|
12266
|
+
if (ssm_dt_b_c_rms) {
|
12267
|
+
dt = lm_ggml_rms_norm(ctx0, dt, norm_rms_eps);
|
12268
|
+
B = lm_ggml_rms_norm(ctx0, B, norm_rms_eps);
|
12269
|
+
C = lm_ggml_rms_norm(ctx0, C, norm_rms_eps);
|
12270
|
+
}
|
12271
|
+
|
12122
12272
|
// {dt_rank, d_inner} * {dt_rank, n_tokens} => {d_inner, n_tokens}
|
12123
12273
|
dt = llm_build_lora_mm(lctx, ctx0, model.layers[il].ssm_dt, dt);
|
12124
12274
|
dt = lm_ggml_add(ctx0, dt, model.layers[il].ssm_dt_b);
|
@@ -13766,6 +13916,254 @@ struct llm_build_context {
|
|
13766
13916
|
|
13767
13917
|
return gf;
|
13768
13918
|
}
|
13919
|
+
|
13920
|
+
struct lm_ggml_cgraph * build_nemotron() {
|
13921
|
+
struct lm_ggml_cgraph * gf = lm_ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
13922
|
+
|
13923
|
+
const int64_t n_embd_head = hparams.n_embd_head_v;
|
13924
|
+
LM_GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
13925
|
+
//LM_GGML_ASSERT(n_embd_head == hparams.n_rot);
|
13926
|
+
|
13927
|
+
struct lm_ggml_tensor * cur;
|
13928
|
+
struct lm_ggml_tensor * inpL;
|
13929
|
+
|
13930
|
+
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
13931
|
+
|
13932
|
+
// inp_pos - contains the positions
|
13933
|
+
struct lm_ggml_tensor * inp_pos = build_inp_pos();
|
13934
|
+
|
13935
|
+
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
13936
|
+
struct lm_ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
13937
|
+
|
13938
|
+
for (int il = 0; il < n_layer; ++il) {
|
13939
|
+
struct lm_ggml_tensor * inpSA = inpL;
|
13940
|
+
|
13941
|
+
// norm
|
13942
|
+
cur = llm_build_norm(ctx0, inpL, hparams,
|
13943
|
+
model.layers[il].attn_norm,
|
13944
|
+
model.layers[il].attn_norm_b,
|
13945
|
+
LLM_NORM, cb, il);
|
13946
|
+
cb(cur, "attn_norm", il);
|
13947
|
+
|
13948
|
+
// self-attention
|
13949
|
+
{
|
13950
|
+
// compute Q and K and RoPE them
|
13951
|
+
struct lm_ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
|
13952
|
+
cb(Qcur, "Qcur", il);
|
13953
|
+
if (model.layers[il].bq) {
|
13954
|
+
Qcur = lm_ggml_add(ctx0, Qcur, model.layers[il].bq);
|
13955
|
+
cb(Qcur, "Qcur", il);
|
13956
|
+
}
|
13957
|
+
|
13958
|
+
struct lm_ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur);
|
13959
|
+
cb(Kcur, "Kcur", il);
|
13960
|
+
if (model.layers[il].bk) {
|
13961
|
+
Kcur = lm_ggml_add(ctx0, Kcur, model.layers[il].bk);
|
13962
|
+
cb(Kcur, "Kcur", il);
|
13963
|
+
}
|
13964
|
+
|
13965
|
+
struct lm_ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
|
13966
|
+
cb(Vcur, "Vcur", il);
|
13967
|
+
if (model.layers[il].bv) {
|
13968
|
+
Vcur = lm_ggml_add(ctx0, Vcur, model.layers[il].bv);
|
13969
|
+
cb(Vcur, "Vcur", il);
|
13970
|
+
}
|
13971
|
+
|
13972
|
+
Qcur = lm_ggml_rope_ext(
|
13973
|
+
ctx0, lm_ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr,
|
13974
|
+
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
13975
|
+
ext_factor, attn_factor, beta_fast, beta_slow
|
13976
|
+
);
|
13977
|
+
cb(Qcur, "Qcur", il);
|
13978
|
+
|
13979
|
+
Kcur = lm_ggml_rope_ext(
|
13980
|
+
ctx0, lm_ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr,
|
13981
|
+
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
13982
|
+
ext_factor, attn_factor, beta_fast, beta_slow
|
13983
|
+
);
|
13984
|
+
cb(Kcur, "Kcur", il);
|
13985
|
+
|
13986
|
+
cur = llm_build_kv(ctx0, lctx, kv_self, gf,
|
13987
|
+
model.layers[il].wo, model.layers[il].bo,
|
13988
|
+
Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
13989
|
+
}
|
13990
|
+
|
13991
|
+
if (il == n_layer - 1) {
|
13992
|
+
// skip computing output for unused tokens
|
13993
|
+
struct lm_ggml_tensor * inp_out_ids = build_inp_out_ids();
|
13994
|
+
cur = lm_ggml_get_rows(ctx0, cur, inp_out_ids);
|
13995
|
+
inpSA = lm_ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
13996
|
+
}
|
13997
|
+
|
13998
|
+
struct lm_ggml_tensor * ffn_inp = lm_ggml_add(ctx0, cur, inpSA);
|
13999
|
+
cb(ffn_inp, "ffn_inp", il);
|
14000
|
+
|
14001
|
+
// feed-forward network
|
14002
|
+
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
14003
|
+
model.layers[il].ffn_norm,
|
14004
|
+
model.layers[il].ffn_norm_b,
|
14005
|
+
LLM_NORM, cb, il);
|
14006
|
+
cb(cur, "ffn_norm", il);
|
14007
|
+
|
14008
|
+
cur = llm_build_ffn(ctx0, lctx, cur,
|
14009
|
+
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
14010
|
+
NULL, NULL, NULL,
|
14011
|
+
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
14012
|
+
NULL,
|
14013
|
+
LLM_FFN_RELU_SQR, LLM_FFN_SEQ, cb, il);
|
14014
|
+
|
14015
|
+
cur = lm_ggml_add(ctx0, cur, ffn_inp);
|
14016
|
+
cb(cur, "ffn_out", il);
|
14017
|
+
|
14018
|
+
cur = lctx.cvec.apply_to(ctx0, cur, il);
|
14019
|
+
cb(cur, "l_out", il);
|
14020
|
+
|
14021
|
+
// input for next layer
|
14022
|
+
inpL = cur;
|
14023
|
+
}
|
14024
|
+
|
14025
|
+
cur = inpL;
|
14026
|
+
|
14027
|
+
cur = llm_build_norm(ctx0, cur, hparams,
|
14028
|
+
model.output_norm, model.output_norm_b,
|
14029
|
+
LLM_NORM, cb, -1);
|
14030
|
+
cb(cur, "result_norm", -1);
|
14031
|
+
|
14032
|
+
// lm_head
|
14033
|
+
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
|
14034
|
+
cb(cur, "result_output", -1);
|
14035
|
+
|
14036
|
+
lm_ggml_build_forward_expand(gf, cur);
|
14037
|
+
|
14038
|
+
return gf;
|
14039
|
+
}
|
14040
|
+
|
14041
|
+
struct lm_ggml_cgraph * build_exaone() {
|
14042
|
+
struct lm_ggml_cgraph * gf = lm_ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
14043
|
+
|
14044
|
+
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
14045
|
+
int32_t n_tokens = this->n_tokens;
|
14046
|
+
|
14047
|
+
const int64_t n_embd_head = hparams.n_embd_head_v;
|
14048
|
+
LM_GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
14049
|
+
LM_GGML_ASSERT(n_embd_head == hparams.n_rot);
|
14050
|
+
|
14051
|
+
struct lm_ggml_tensor * cur;
|
14052
|
+
struct lm_ggml_tensor * inpL;
|
14053
|
+
|
14054
|
+
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
14055
|
+
|
14056
|
+
// inp_pos - contains the positions
|
14057
|
+
struct lm_ggml_tensor * inp_pos = build_inp_pos();
|
14058
|
+
|
14059
|
+
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
14060
|
+
struct lm_ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
14061
|
+
|
14062
|
+
for (int il = 0; il < n_layer; ++il) {
|
14063
|
+
struct lm_ggml_tensor * inpSA = inpL;
|
14064
|
+
|
14065
|
+
// norm
|
14066
|
+
cur = llm_build_norm(ctx0, inpL, hparams,
|
14067
|
+
model.layers[il].attn_norm, NULL,
|
14068
|
+
LLM_NORM_RMS, cb, il);
|
14069
|
+
cb(cur, "attn_norm", il);
|
14070
|
+
|
14071
|
+
// self-attention
|
14072
|
+
{
|
14073
|
+
// rope freq factors for llama3; may return nullptr for llama2 and other models
|
14074
|
+
struct lm_ggml_tensor * rope_factors = build_rope_factors(il);
|
14075
|
+
|
14076
|
+
// compute Q and K and RoPE them
|
14077
|
+
struct lm_ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
|
14078
|
+
cb(Qcur, "Qcur", il);
|
14079
|
+
if (model.layers[il].bq) {
|
14080
|
+
Qcur = lm_ggml_add(ctx0, Qcur, model.layers[il].bq);
|
14081
|
+
cb(Qcur, "Qcur", il);
|
14082
|
+
}
|
14083
|
+
|
14084
|
+
struct lm_ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur);
|
14085
|
+
cb(Kcur, "Kcur", il);
|
14086
|
+
if (model.layers[il].bk) {
|
14087
|
+
Kcur = lm_ggml_add(ctx0, Kcur, model.layers[il].bk);
|
14088
|
+
cb(Kcur, "Kcur", il);
|
14089
|
+
}
|
14090
|
+
|
14091
|
+
struct lm_ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
|
14092
|
+
cb(Vcur, "Vcur", il);
|
14093
|
+
if (model.layers[il].bv) {
|
14094
|
+
Vcur = lm_ggml_add(ctx0, Vcur, model.layers[il].bv);
|
14095
|
+
cb(Vcur, "Vcur", il);
|
14096
|
+
}
|
14097
|
+
|
14098
|
+
Qcur = lm_ggml_rope_ext(
|
14099
|
+
ctx0, lm_ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, rope_factors,
|
14100
|
+
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
14101
|
+
ext_factor, attn_factor, beta_fast, beta_slow
|
14102
|
+
);
|
14103
|
+
cb(Qcur, "Qcur", il);
|
14104
|
+
|
14105
|
+
Kcur = lm_ggml_rope_ext(
|
14106
|
+
ctx0, lm_ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, rope_factors,
|
14107
|
+
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
14108
|
+
ext_factor, attn_factor, beta_fast, beta_slow
|
14109
|
+
);
|
14110
|
+
cb(Kcur, "Kcur", il);
|
14111
|
+
|
14112
|
+
cur = llm_build_kv(ctx0, lctx, kv_self, gf,
|
14113
|
+
model.layers[il].wo, model.layers[il].bo,
|
14114
|
+
Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
14115
|
+
}
|
14116
|
+
|
14117
|
+
if (il == n_layer - 1) {
|
14118
|
+
// skip computing output for unused tokens
|
14119
|
+
struct lm_ggml_tensor * inp_out_ids = build_inp_out_ids();
|
14120
|
+
n_tokens = n_outputs;
|
14121
|
+
cur = lm_ggml_get_rows(ctx0, cur, inp_out_ids);
|
14122
|
+
inpSA = lm_ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
14123
|
+
}
|
14124
|
+
|
14125
|
+
struct lm_ggml_tensor * ffn_inp = lm_ggml_add(ctx0, cur, inpSA);
|
14126
|
+
cb(ffn_inp, "ffn_inp", il);
|
14127
|
+
|
14128
|
+
// feed-forward network
|
14129
|
+
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
14130
|
+
model.layers[il].ffn_norm, NULL,
|
14131
|
+
LLM_NORM_RMS, cb, il);
|
14132
|
+
cb(cur, "ffn_norm", il);
|
14133
|
+
|
14134
|
+
cur = llm_build_ffn(ctx0, lctx, cur,
|
14135
|
+
model.layers[il].ffn_up, NULL, NULL,
|
14136
|
+
model.layers[il].ffn_gate, NULL, NULL,
|
14137
|
+
model.layers[il].ffn_down, NULL, NULL,
|
14138
|
+
NULL,
|
14139
|
+
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
|
14140
|
+
cb(cur, "ffn_out", il);
|
14141
|
+
|
14142
|
+
cur = lm_ggml_add(ctx0, cur, ffn_inp);
|
14143
|
+
cb(cur, "ffn_out", il);
|
14144
|
+
|
14145
|
+
cur = lctx.cvec.apply_to(ctx0, cur, il);
|
14146
|
+
cb(cur, "l_out", il);
|
14147
|
+
|
14148
|
+
// input for next layer
|
14149
|
+
inpL = cur;
|
14150
|
+
}
|
14151
|
+
|
14152
|
+
cur = inpL;
|
14153
|
+
|
14154
|
+
cur = llm_build_norm(ctx0, cur, hparams,
|
14155
|
+
model.output_norm, NULL,
|
14156
|
+
LLM_NORM_RMS, cb, -1);
|
14157
|
+
cb(cur, "result_norm", -1);
|
14158
|
+
|
14159
|
+
// lm_head
|
14160
|
+
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
|
14161
|
+
cb(cur, "result_output", -1);
|
14162
|
+
|
14163
|
+
lm_ggml_build_forward_expand(gf, cur);
|
14164
|
+
|
14165
|
+
return gf;
|
14166
|
+
}
|
13769
14167
|
};
|
13770
14168
|
|
13771
14169
|
static struct lm_ggml_cgraph * llama_build_graph_defrag(llama_context & lctx, const std::vector<uint32_t> & ids) {
|
@@ -14021,6 +14419,14 @@ static struct lm_ggml_cgraph * llama_build_graph(
|
|
14021
14419
|
{
|
14022
14420
|
result = llm.build_jais();
|
14023
14421
|
} break;
|
14422
|
+
case LLM_ARCH_NEMOTRON:
|
14423
|
+
{
|
14424
|
+
result = llm.build_nemotron();
|
14425
|
+
} break;
|
14426
|
+
case LLM_ARCH_EXAONE:
|
14427
|
+
{
|
14428
|
+
result = llm.build_exaone();
|
14429
|
+
} break;
|
14024
14430
|
default:
|
14025
14431
|
LM_GGML_ABORT("fatal error");
|
14026
14432
|
}
|
@@ -15727,6 +16133,9 @@ static lm_ggml_type llama_tensor_get_type(quantize_state_internal & qs, lm_ggml_
|
|
15727
16133
|
case LM_GGML_TYPE_Q6_K: new_type = LM_GGML_TYPE_Q8_0; break;
|
15728
16134
|
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
15729
16135
|
}
|
16136
|
+
if (tensor->ne[0] % lm_ggml_blck_size(new_type) != 0) {
|
16137
|
+
new_type = LM_GGML_TYPE_F16;
|
16138
|
+
}
|
15730
16139
|
LLAMA_LOG_WARN(" - using fallback quantization %s\n", lm_ggml_type_name(new_type));
|
15731
16140
|
++qs.n_fallback;
|
15732
16141
|
}
|
@@ -16055,8 +16464,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|
16055
16464
|
// do not quantize Mamba's small yet 2D weights
|
16056
16465
|
// NOTE: can't use LLM_TN here because the layer number is not known
|
16057
16466
|
quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
|
16058
|
-
quantize &= name.find("ssm_x.weight") == std::string::npos;
|
16059
|
-
quantize &= name.find("ssm_dt.weight") == std::string::npos;
|
16060
16467
|
|
16061
16468
|
// do not quantize relative position bias (T5)
|
16062
16469
|
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
|
@@ -17091,6 +17498,8 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
|
17091
17498
|
case LLM_ARCH_OPENELM:
|
17092
17499
|
case LLM_ARCH_GPTNEOX:
|
17093
17500
|
case LLM_ARCH_CODESHELL:
|
17501
|
+
case LLM_ARCH_NEMOTRON:
|
17502
|
+
case LLM_ARCH_EXAONE:
|
17094
17503
|
return LLAMA_ROPE_TYPE_NEOX;
|
17095
17504
|
|
17096
17505
|
// all model arches should be listed explicitly here
|
@@ -19021,6 +19430,22 @@ static int32_t llama_chat_apply_template_internal(
|
|
19021
19430
|
if (add_ass) {
|
19022
19431
|
ss << "Assistant:";
|
19023
19432
|
}
|
19433
|
+
} else if (tmpl == "exaone3" || (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]"))) {
|
19434
|
+
// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
|
19435
|
+
// EXAONE-3.0-7.8B-Instruct
|
19436
|
+
for (auto message : chat) {
|
19437
|
+
std::string role(message->role);
|
19438
|
+
if (role == "system") {
|
19439
|
+
ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
|
19440
|
+
} else if (role == "user") {
|
19441
|
+
ss << "[|user|]" << trim(message->content) << "\n";
|
19442
|
+
} else if (role == "assistant") {
|
19443
|
+
ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
|
19444
|
+
}
|
19445
|
+
}
|
19446
|
+
if (add_ass) {
|
19447
|
+
ss << "[|assistant|]";
|
19448
|
+
}
|
19024
19449
|
} else {
|
19025
19450
|
// template not supported
|
19026
19451
|
return -1;
|
@@ -19134,6 +19559,10 @@ void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * can
|
|
19134
19559
|
llama_sample_min_p_impl(ctx ? &ctx->sampling : nullptr, candidates, p, min_keep);
|
19135
19560
|
}
|
19136
19561
|
|
19562
|
+
void llama_sample_xtc(struct llama_context * ctx, llama_token_data_array * candidates, float xtc_threshold, float xtc_probability, size_t min_keep, std::mt19937 rng){
|
19563
|
+
llama_sample_xtc_impl(ctx ? &ctx-> sampling: nullptr, candidates, xtc_threshold, xtc_probability, min_keep, rng);
|
19564
|
+
}
|
19565
|
+
|
19137
19566
|
void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep) {
|
19138
19567
|
llama_sample_tail_free_impl(ctx ? &ctx->sampling : nullptr, candidates, z, min_keep);
|
19139
19568
|
}
|
package/cpp/llama.h
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include <stdint.h>
|
9
9
|
#include <stdio.h>
|
10
10
|
#include <stdbool.h>
|
11
|
+
#include <random>
|
11
12
|
|
12
13
|
#ifdef LLAMA_SHARED
|
13
14
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
@@ -95,6 +96,7 @@ extern "C" {
|
|
95
96
|
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
96
97
|
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
97
98
|
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
99
|
+
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
98
100
|
};
|
99
101
|
|
100
102
|
enum llama_rope_type {
|
@@ -1084,6 +1086,15 @@ extern "C" {
|
|
1084
1086
|
float p,
|
1085
1087
|
size_t min_keep);
|
1086
1088
|
|
1089
|
+
/// @details XTC sampling
|
1090
|
+
LLAMA_API void llama_sample_xtc(
|
1091
|
+
struct llama_context * ctx,
|
1092
|
+
llama_token_data_array * candidates,
|
1093
|
+
float xtc_threshold,
|
1094
|
+
float xtc_probability,
|
1095
|
+
size_t min_keep,
|
1096
|
+
std::mt19937 rng);
|
1097
|
+
|
1087
1098
|
/// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
|
1088
1099
|
LLAMA_API void llama_sample_tail_free(
|
1089
1100
|
struct llama_context * ctx,
|
package/cpp/sampling.cpp
CHANGED
@@ -229,6 +229,7 @@ std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::strin
|
|
229
229
|
// no reasons to expose this function in header
|
230
230
|
static void sampler_queue(
|
231
231
|
struct llama_context * ctx_main,
|
232
|
+
struct llama_sampling_context * ctx_sampling,
|
232
233
|
const llama_sampling_params & params,
|
233
234
|
llama_token_data_array & cur_p,
|
234
235
|
size_t min_keep) {
|
@@ -238,6 +239,8 @@ static void sampler_queue(
|
|
238
239
|
const int32_t top_k = params.top_k;
|
239
240
|
const float top_p = params.top_p;
|
240
241
|
const float min_p = params.min_p;
|
242
|
+
const float xtc_t = params.xtc_t;
|
243
|
+
const float xtc_p = params.xtc_p;
|
241
244
|
const float tfs_z = params.tfs_z;
|
242
245
|
const float typical_p = params.typical_p;
|
243
246
|
const std::vector<llama_sampler_type> & samplers_sequence = params.samplers_sequence;
|
@@ -249,6 +252,7 @@ static void sampler_queue(
|
|
249
252
|
case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
|
250
253
|
case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
|
251
254
|
case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
|
255
|
+
case llama_sampler_type::XTC : llama_sample_xtc (ctx_main, &cur_p, xtc_t, xtc_p, min_keep, ctx_sampling->rng); break;
|
252
256
|
case llama_sampler_type::TEMPERATURE:
|
253
257
|
if (dynatemp_range > 0) {
|
254
258
|
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
|
@@ -302,7 +306,7 @@ static llama_token llama_sampling_sample_impl(
|
|
302
306
|
// temperature sampling
|
303
307
|
size_t min_keep = std::max(1, params.min_keep);
|
304
308
|
|
305
|
-
sampler_queue(ctx_main, params, cur_p, min_keep);
|
309
|
+
sampler_queue(ctx_main, ctx_sampling, params, cur_p, min_keep);
|
306
310
|
|
307
311
|
id = llama_sample_token_with_rng(ctx_main, &cur_p, ctx_sampling->rng);
|
308
312
|
|
package/cpp/sampling.h
CHANGED
@@ -14,6 +14,7 @@ enum class llama_sampler_type : char {
|
|
14
14
|
TOP_K = 'k',
|
15
15
|
TOP_P = 'p',
|
16
16
|
MIN_P = 'm',
|
17
|
+
XTC = 'x',
|
17
18
|
TFS_Z = 'f',
|
18
19
|
TYPICAL_P = 'y',
|
19
20
|
TEMPERATURE = 't'
|
@@ -27,6 +28,8 @@ typedef struct llama_sampling_params {
|
|
27
28
|
int32_t top_k = 40; // <= 0 to use vocab size
|
28
29
|
float top_p = 0.95f; // 1.0 = disabled
|
29
30
|
float min_p = 0.05f; // 0.0 = disabled
|
31
|
+
float xtc_t = 0.0f; // 0.0 = disabled
|
32
|
+
float xtc_p = 0.0f; // controls the probability of XTC removal
|
30
33
|
float tfs_z = 1.00f; // 1.0 = disabled
|
31
34
|
float typical_p = 1.00f; // 1.0 = disabled
|
32
35
|
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
@@ -48,6 +51,7 @@ typedef struct llama_sampling_params {
|
|
48
51
|
llama_sampler_type::TYPICAL_P,
|
49
52
|
llama_sampler_type::TOP_P,
|
50
53
|
llama_sampler_type::MIN_P,
|
54
|
+
llama_sampler_type::XTC,
|
51
55
|
llama_sampler_type::TEMPERATURE
|
52
56
|
};
|
53
57
|
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,
|
1
|
+
{"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GA+JnCC,gCAAmB,CAACC,GAAG,CAAO,SAAS,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;
|
1
|
+
{"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"..\\..\\src","sources":["NativeRNLlama.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;AA+JlD,eAAeA,mBAAmB,CAACC,GAAG,CAAO,SAAS,CAAC"}
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7C,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAErE,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAA;IAC1E,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
|
1
|
+
{"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7C,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAErE,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAA;IAC1E,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
|
package/package.json
CHANGED