cui-llama.rn 1.2.3 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -2
- package/android/src/main/CMakeLists.txt +1 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +0 -3
- package/android/src/main/jni.cpp +9 -11
- package/cpp/common.cpp +85 -75
- package/cpp/common.h +127 -91
- package/cpp/ggml-aarch64.c +269 -0
- package/cpp/ggml-alloc.c +17 -19
- package/cpp/ggml-backend-impl.h +4 -15
- package/cpp/ggml-backend.cpp +1697 -1626
- package/cpp/ggml-backend.h +13 -25
- package/cpp/ggml-cpp.h +38 -0
- package/cpp/ggml-cpu.c +13720 -0
- package/cpp/ggml-cpu.h +150 -0
- package/cpp/ggml-impl.h +95 -0
- package/cpp/ggml-metal.m +185 -71
- package/cpp/ggml-quants.c +38 -51
- package/cpp/ggml.c +4468 -19500
- package/cpp/ggml.h +26 -146
- package/cpp/json-schema-to-grammar.cpp +1 -1
- package/cpp/llama-sampling.cpp +742 -249
- package/cpp/llama-sampling.h +21 -2
- package/cpp/llama-vocab.cpp +49 -9
- package/cpp/llama-vocab.h +35 -11
- package/cpp/llama.cpp +2468 -2307
- package/cpp/llama.h +65 -32
- package/cpp/log.cpp +50 -50
- package/cpp/log.h +18 -18
- package/cpp/rn-llama.hpp +23 -22
- package/cpp/sampling.cpp +117 -118
- package/cpp/sampling.h +20 -20
- package/cpp/sgemm.cpp +57 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +0 -1
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +0 -1
package/cpp/common.h
CHANGED
@@ -24,12 +24,12 @@
|
|
24
24
|
|
25
25
|
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
26
26
|
|
27
|
-
struct
|
27
|
+
struct common_lora_adapter_info {
|
28
28
|
std::string path;
|
29
29
|
float scale;
|
30
30
|
};
|
31
31
|
|
32
|
-
struct
|
32
|
+
struct common_lora_adapter_container : common_lora_adapter_info {
|
33
33
|
struct llama_lora_adapter * adapter;
|
34
34
|
};
|
35
35
|
|
@@ -39,7 +39,7 @@ extern char const * LLAMA_COMMIT;
|
|
39
39
|
extern char const * LLAMA_COMPILER;
|
40
40
|
extern char const * LLAMA_BUILD_TARGET;
|
41
41
|
|
42
|
-
struct
|
42
|
+
struct common_control_vector_load_info;
|
43
43
|
|
44
44
|
#define print_build_info() do { \
|
45
45
|
fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \
|
@@ -93,15 +93,17 @@ enum llama_example {
|
|
93
93
|
LLAMA_EXAMPLE_COUNT,
|
94
94
|
};
|
95
95
|
|
96
|
-
enum
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
96
|
+
enum common_sampler_type {
|
97
|
+
COMMON_SAMPLER_TYPE_NONE = 0,
|
98
|
+
COMMON_SAMPLER_TYPE_DRY = 1,
|
99
|
+
COMMON_SAMPLER_TYPE_TOP_K = 2,
|
100
|
+
COMMON_SAMPLER_TYPE_TOP_P = 3,
|
101
|
+
COMMON_SAMPLER_TYPE_MIN_P = 4,
|
102
|
+
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
|
103
|
+
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
|
104
|
+
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
|
105
|
+
COMMON_SAMPLER_TYPE_XTC = 8,
|
106
|
+
COMMON_SAMPLER_TYPE_INFILL = 9,
|
105
107
|
};
|
106
108
|
|
107
109
|
// dimensionality reduction methods, used by cvector-generator
|
@@ -111,41 +113,47 @@ enum dimre_method {
|
|
111
113
|
};
|
112
114
|
|
113
115
|
// sampler parameters
|
114
|
-
struct
|
116
|
+
struct common_sampler_params {
|
115
117
|
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
116
|
-
|
117
|
-
int32_t n_prev
|
118
|
-
int32_t n_probs
|
119
|
-
int32_t min_keep
|
120
|
-
int32_t top_k
|
121
|
-
float top_p
|
122
|
-
float min_p
|
123
|
-
float
|
124
|
-
float
|
125
|
-
float
|
126
|
-
float
|
127
|
-
float
|
128
|
-
float
|
129
|
-
|
130
|
-
|
131
|
-
float
|
132
|
-
float
|
133
|
-
float
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
118
|
+
|
119
|
+
int32_t n_prev = 64; // number of previous tokens to remember
|
120
|
+
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
121
|
+
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
122
|
+
int32_t top_k = 40; // <= 0 to use vocab size
|
123
|
+
float top_p = 0.95f; // 1.0 = disabled
|
124
|
+
float min_p = 0.05f; // 0.0 = disabled
|
125
|
+
float xtc_probability = 0.00f; // 0.0 = disabled
|
126
|
+
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
127
|
+
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
128
|
+
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
129
|
+
float dynatemp_range = 0.00f; // 0.0 = disabled
|
130
|
+
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
131
|
+
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
132
|
+
float penalty_repeat = 1.00f; // 1.0 = disabled
|
133
|
+
float penalty_freq = 0.00f; // 0.0 = disabled
|
134
|
+
float penalty_present = 0.00f; // 0.0 = disabled
|
135
|
+
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
136
|
+
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
137
|
+
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
138
|
+
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
139
|
+
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
140
|
+
float mirostat_tau = 5.00f; // target entropy
|
141
|
+
float mirostat_eta = 0.10f; // learning rate
|
142
|
+
bool penalize_nl = false; // consider newlines as a repeatable token
|
143
|
+
bool ignore_eos = false;
|
144
|
+
bool no_perf = false; // disable performance metrics
|
145
|
+
|
146
|
+
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
147
|
+
|
148
|
+
|
149
|
+
std::vector<enum common_sampler_type> samplers = {
|
150
|
+
COMMON_SAMPLER_TYPE_DRY,
|
151
|
+
COMMON_SAMPLER_TYPE_TOP_K,
|
152
|
+
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
153
|
+
COMMON_SAMPLER_TYPE_TOP_P,
|
154
|
+
COMMON_SAMPLER_TYPE_MIN_P,
|
155
|
+
COMMON_SAMPLER_TYPE_XTC,
|
156
|
+
COMMON_SAMPLER_TYPE_TEMPERATURE,
|
149
157
|
};
|
150
158
|
|
151
159
|
std::string grammar; // optional BNF-like grammar to constrain sampling
|
@@ -156,13 +164,13 @@ struct gpt_sampler_params {
|
|
156
164
|
std::string print() const;
|
157
165
|
};
|
158
166
|
|
159
|
-
struct
|
167
|
+
struct common_params {
|
160
168
|
|
161
169
|
void * progress_callback_user_data = nullptr;
|
162
170
|
llama_progress_callback progress_callback = nullptr;
|
163
171
|
bool vocab_only = false;
|
164
172
|
int32_t n_predict = -1; // new tokens to predict
|
165
|
-
int32_t n_ctx =
|
173
|
+
int32_t n_ctx = 4096; // context size
|
166
174
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
167
175
|
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
168
176
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
@@ -202,7 +210,7 @@ struct gpt_params {
|
|
202
210
|
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
203
211
|
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
|
204
212
|
|
205
|
-
struct
|
213
|
+
struct common_sampler_params sparams;
|
206
214
|
|
207
215
|
std::string model = ""; // model path // NOLINT
|
208
216
|
std::string model_draft = ""; // draft model for speculative decoding // NOLINT
|
@@ -227,9 +235,9 @@ struct gpt_params {
|
|
227
235
|
std::vector<llama_model_kv_override> kv_overrides;
|
228
236
|
|
229
237
|
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply)
|
230
|
-
std::vector<
|
238
|
+
std::vector<common_lora_adapter_info> lora_adapters; // lora adapter path with user defined scale
|
231
239
|
|
232
|
-
std::vector<
|
240
|
+
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
233
241
|
|
234
242
|
int32_t verbosity = 0;
|
235
243
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
@@ -287,21 +295,21 @@ struct gpt_params {
|
|
287
295
|
|
288
296
|
// embedding
|
289
297
|
bool embedding = false; // get only sentence embedding
|
290
|
-
int32_t embd_normalize = 2; // normalisation for
|
298
|
+
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
291
299
|
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
292
|
-
std::string embd_sep = "\n"; // separator of
|
300
|
+
std::string embd_sep = "\n"; // separator of embeddings
|
293
301
|
bool reranking = false; // enable reranking support on server
|
294
302
|
|
295
303
|
// server params
|
296
304
|
int32_t port = 8080; // server listens on this network port
|
297
305
|
int32_t timeout_read = 600; // http read timeout in seconds
|
298
306
|
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
299
|
-
|
307
|
+
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
308
|
+
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
|
300
309
|
|
301
310
|
std::string hostname = "127.0.0.1";
|
302
311
|
std::string public_path = ""; // NOLINT
|
303
312
|
std::string chat_template = ""; // NOLINT
|
304
|
-
std::string system_prompt = ""; // NOLINT
|
305
313
|
bool enable_chat_template = true;
|
306
314
|
|
307
315
|
std::vector<std::string> api_keys;
|
@@ -367,20 +375,31 @@ struct gpt_params {
|
|
367
375
|
|
368
376
|
// call once at the start of a program if it uses libcommon
|
369
377
|
// initializes the logging system and prints info about the build
|
370
|
-
void
|
378
|
+
void common_init();
|
371
379
|
|
372
|
-
std::string
|
380
|
+
std::string common_params_get_system_info(const common_params & params);
|
373
381
|
|
374
|
-
bool parse_cpu_range(const std::string& range, bool(&boolmask)[LM_GGML_MAX_N_THREADS]);
|
375
|
-
bool parse_cpu_mask(const std::string& mask, bool(&boolmask)[LM_GGML_MAX_N_THREADS]);
|
376
|
-
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model = nullptr);
|
382
|
+
bool parse_cpu_range(const std::string & range, bool(&boolmask)[LM_GGML_MAX_N_THREADS]);
|
383
|
+
bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[LM_GGML_MAX_N_THREADS]);
|
384
|
+
void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr);
|
377
385
|
bool set_process_priority(enum lm_ggml_sched_priority prio);
|
378
386
|
|
379
387
|
//
|
380
388
|
// String utils
|
381
389
|
//
|
382
390
|
|
383
|
-
|
391
|
+
#ifdef __GNUC__
|
392
|
+
#ifdef __MINGW32__
|
393
|
+
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
394
|
+
#else
|
395
|
+
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
396
|
+
#endif
|
397
|
+
#else
|
398
|
+
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
|
399
|
+
#endif
|
400
|
+
|
401
|
+
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
402
|
+
std::string string_format(const char * fmt, ...);
|
384
403
|
|
385
404
|
std::string string_strip(const std::string & str);
|
386
405
|
std::string string_get_sortable_timestamp();
|
@@ -389,6 +408,7 @@ void string_replace_all(std::string & s, const std::string & search, const std::
|
|
389
408
|
|
390
409
|
template<class T>
|
391
410
|
static std::vector<T> string_split(const std::string & str, char delim) {
|
411
|
+
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
|
392
412
|
std::vector<T> values;
|
393
413
|
std::istringstream str_stream(str);
|
394
414
|
std::string token;
|
@@ -401,6 +421,22 @@ static std::vector<T> string_split(const std::string & str, char delim) {
|
|
401
421
|
return values;
|
402
422
|
}
|
403
423
|
|
424
|
+
template<>
|
425
|
+
std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
|
426
|
+
{
|
427
|
+
std::vector<std::string> parts;
|
428
|
+
size_t begin_pos = 0;
|
429
|
+
size_t separator_pos = input.find(separator);
|
430
|
+
while (separator_pos != std::string::npos) {
|
431
|
+
std::string part = input.substr(begin_pos, separator_pos - begin_pos);
|
432
|
+
parts.emplace_back(part);
|
433
|
+
begin_pos = separator_pos + 1;
|
434
|
+
separator_pos = input.find(separator, begin_pos);
|
435
|
+
}
|
436
|
+
parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos));
|
437
|
+
return parts;
|
438
|
+
}
|
439
|
+
|
404
440
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
405
441
|
void string_process_escapes(std::string & input);
|
406
442
|
|
@@ -423,29 +459,29 @@ std::string fs_get_cache_file(const std::string & filename);
|
|
423
459
|
// Model utils
|
424
460
|
//
|
425
461
|
|
426
|
-
struct
|
462
|
+
struct common_init_result {
|
427
463
|
struct llama_model * model = nullptr;
|
428
464
|
struct llama_context * context = nullptr;
|
429
|
-
std::vector<
|
465
|
+
std::vector<common_lora_adapter_container> lora_adapters;
|
430
466
|
};
|
431
467
|
|
432
|
-
struct
|
468
|
+
struct common_init_result common_init_from_params(common_params & params);
|
433
469
|
|
434
|
-
struct llama_model_params
|
435
|
-
struct llama_context_params
|
470
|
+
struct llama_model_params common_model_params_to_llama (const common_params & params);
|
471
|
+
struct llama_context_params common_context_params_to_llama(const common_params & params);
|
436
472
|
struct lm_ggml_threadpool_params lm_ggml_threadpool_params_from_cpu_params(const cpu_params & params);
|
437
473
|
|
438
|
-
struct llama_model *
|
439
|
-
struct llama_model *
|
474
|
+
struct llama_model * common_load_model_from_url(const char * model_url, const char * path_model, const char * hf_token, const struct llama_model_params & params);
|
475
|
+
struct llama_model * common_load_model_from_hf(const char * repo, const char * file, const char * path_model, const char * hf_token, const struct llama_model_params & params);
|
440
476
|
|
441
477
|
// clear LoRA adapters from context, then apply new list of adapters
|
442
|
-
void
|
478
|
+
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_container> & lora_adapters);
|
443
479
|
|
444
480
|
// Batch utils
|
445
481
|
|
446
|
-
void
|
482
|
+
void common_batch_clear(struct llama_batch & batch);
|
447
483
|
|
448
|
-
void
|
484
|
+
void common_batch_add(
|
449
485
|
struct llama_batch & batch,
|
450
486
|
llama_token id,
|
451
487
|
llama_pos pos,
|
@@ -458,13 +494,13 @@ void llama_batch_add(
|
|
458
494
|
|
459
495
|
// tokenizes a string into a vector of tokens
|
460
496
|
// should work similar to Python's `tokenizer.encode`
|
461
|
-
std::vector<llama_token>
|
497
|
+
std::vector<llama_token> common_tokenize(
|
462
498
|
const struct llama_context * ctx,
|
463
499
|
const std::string & text,
|
464
500
|
bool add_special,
|
465
501
|
bool parse_special = false);
|
466
502
|
|
467
|
-
std::vector<llama_token>
|
503
|
+
std::vector<llama_token> common_tokenize(
|
468
504
|
const struct llama_model * model,
|
469
505
|
const std::string & text,
|
470
506
|
bool add_special,
|
@@ -472,7 +508,7 @@ std::vector<llama_token> llama_tokenize(
|
|
472
508
|
|
473
509
|
// tokenizes a token into a piece, optionally renders special/control tokens
|
474
510
|
// should work similar to Python's `tokenizer.id_to_piece`
|
475
|
-
std::string
|
511
|
+
std::string common_token_to_piece(
|
476
512
|
const struct llama_context * ctx,
|
477
513
|
llama_token token,
|
478
514
|
bool special = true);
|
@@ -480,7 +516,7 @@ std::string llama_token_to_piece(
|
|
480
516
|
// detokenizes a vector of tokens into a string
|
481
517
|
// should work similar to Python's `tokenizer.decode`
|
482
518
|
// optionally renders special/control tokens
|
483
|
-
std::string
|
519
|
+
std::string common_detokenize(
|
484
520
|
llama_context * ctx,
|
485
521
|
const std::vector<llama_token> & tokens,
|
486
522
|
bool special = true);
|
@@ -490,31 +526,31 @@ std::string llama_detokenize(
|
|
490
526
|
//
|
491
527
|
|
492
528
|
// same with llama_chat_message, but uses std::string
|
493
|
-
struct
|
529
|
+
struct common_chat_msg {
|
494
530
|
std::string role;
|
495
531
|
std::string content;
|
496
532
|
};
|
497
533
|
|
498
534
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
499
|
-
bool
|
535
|
+
bool common_chat_verify_template(const std::string & tmpl);
|
500
536
|
|
501
537
|
// CPP wrapper for llama_chat_apply_template
|
502
538
|
// If the built-in template is not supported, we default to chatml
|
503
539
|
// If the custom "tmpl" is not supported, we throw an error
|
504
|
-
std::string
|
540
|
+
std::string common_chat_apply_template(const struct llama_model * model,
|
505
541
|
const std::string & tmpl,
|
506
|
-
const std::vector<
|
542
|
+
const std::vector<common_chat_msg> & chat,
|
507
543
|
bool add_ass);
|
508
544
|
|
509
545
|
// Format single message, while taking into account the position of that message in chat history
|
510
|
-
std::string
|
546
|
+
std::string common_chat_format_single(const struct llama_model * model,
|
511
547
|
const std::string & tmpl,
|
512
|
-
const std::vector<
|
513
|
-
const
|
548
|
+
const std::vector<common_chat_msg> & past_msg,
|
549
|
+
const common_chat_msg & new_msg,
|
514
550
|
bool add_ass);
|
515
551
|
|
516
552
|
// Returns an example of formatted chat
|
517
|
-
std::string
|
553
|
+
std::string common_chat_format_example(const struct llama_model * model,
|
518
554
|
const std::string & tmpl);
|
519
555
|
|
520
556
|
//
|
@@ -522,31 +558,31 @@ std::string llama_chat_format_example(const struct llama_model * model,
|
|
522
558
|
//
|
523
559
|
|
524
560
|
// Dump the KV cache view with the number of sequences per cell.
|
525
|
-
void
|
561
|
+
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
526
562
|
|
527
563
|
// Dump the KV cache view showing individual sequences in each cell (long output).
|
528
|
-
void
|
564
|
+
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
529
565
|
|
530
566
|
//
|
531
567
|
// Embedding utils
|
532
568
|
//
|
533
569
|
|
534
|
-
void
|
570
|
+
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2);
|
535
571
|
|
536
|
-
float
|
572
|
+
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
537
573
|
|
538
574
|
//
|
539
575
|
// Control vector utils
|
540
576
|
//
|
541
577
|
|
542
|
-
struct
|
578
|
+
struct common_control_vector_data {
|
543
579
|
int n_embd;
|
544
580
|
|
545
581
|
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
546
582
|
std::vector<float> data;
|
547
583
|
};
|
548
584
|
|
549
|
-
struct
|
585
|
+
struct common_control_vector_load_info {
|
550
586
|
float strength;
|
551
587
|
|
552
588
|
std::string fname;
|
@@ -554,7 +590,7 @@ struct llama_control_vector_load_info {
|
|
554
590
|
|
555
591
|
// Load control vectors, scale each by strength, and add them together.
|
556
592
|
// On error, returns {-1, empty}
|
557
|
-
|
593
|
+
common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos);
|
558
594
|
|
559
595
|
//
|
560
596
|
// Split utils
|
@@ -573,5 +609,5 @@ void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std
|
|
573
609
|
void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data);
|
574
610
|
|
575
611
|
void yaml_dump_non_result_info(
|
576
|
-
FILE * stream, const
|
612
|
+
FILE * stream, const common_params & params, const llama_context * lctx,
|
577
613
|
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|