cui-llama.rn 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/android/src/main/CMakeLists.txt +22 -19
- package/android/src/main/java/com/rnllama/LlamaContext.java +62 -20
- package/cpp/common.cpp +4 -11
- package/cpp/common.h +1 -1
- package/cpp/ggml-aarch64.c +2193 -2193
- package/cpp/ggml-aarch64.h +39 -39
- package/cpp/ggml-alloc.c +1042 -1041
- package/cpp/ggml-backend-impl.h +153 -153
- package/cpp/ggml-backend.c +2234 -2225
- package/cpp/ggml-backend.h +238 -236
- package/cpp/ggml-common.h +1829 -1829
- package/cpp/ggml-impl.h +655 -655
- package/cpp/ggml-metal.h +65 -65
- package/cpp/ggml-metal.m +3269 -3273
- package/cpp/ggml-quants.c +14860 -15022
- package/cpp/ggml-quants.h +132 -132
- package/cpp/ggml.c +16 -6
- package/cpp/ggml.h +2447 -2444
- package/cpp/llama.cpp +634 -531
- package/cpp/llama.h +30 -14
- package/cpp/log.h +737 -737
- package/cpp/rn-llama.hpp +9 -1
- package/cpp/sampling.cpp +460 -460
- package/cpp/sgemm.cpp +1027 -1027
- package/cpp/sgemm.h +14 -14
- package/package.json +1 -1
package/cpp/llama.h
CHANGED
@@ -40,7 +40,7 @@
|
|
40
40
|
#define LLAMA_FILE_MAGIC_GGSQ 0x67677371u // 'ggsq'
|
41
41
|
|
42
42
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
43
|
-
#define LLAMA_SESSION_VERSION
|
43
|
+
#define LLAMA_SESSION_VERSION 7
|
44
44
|
|
45
45
|
#define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
|
46
46
|
#define LLAMA_STATE_SEQ_VERSION 1
|
@@ -92,6 +92,9 @@ extern "C" {
|
|
92
92
|
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
93
93
|
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
94
94
|
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
95
|
+
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
|
96
|
+
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
|
97
|
+
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
95
98
|
};
|
96
99
|
|
97
100
|
// note: these values should be synchronized with lm_ggml_rope
|
@@ -133,7 +136,7 @@ extern "C" {
|
|
133
136
|
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
134
137
|
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
135
138
|
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
136
|
-
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
139
|
+
// LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
137
140
|
// LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
138
141
|
// LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
|
139
142
|
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
@@ -411,6 +414,9 @@ extern "C" {
|
|
411
414
|
const char * content;
|
412
415
|
} llama_chat_message;
|
413
416
|
|
417
|
+
// lora adapter
|
418
|
+
struct llama_lora_adapter;
|
419
|
+
|
414
420
|
// Helpers for getting default parameters
|
415
421
|
LLAMA_API struct llama_model_params llama_model_default_params(void);
|
416
422
|
LLAMA_API struct llama_context_params llama_context_default_params(void);
|
@@ -510,18 +516,28 @@ extern "C" {
|
|
510
516
|
const char * fname_out,
|
511
517
|
const llama_model_quantize_params * params);
|
512
518
|
|
513
|
-
//
|
514
|
-
//
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
519
|
+
// Load a LoRA adapter from file
|
520
|
+
// The loaded adapter will be associated to the given model, and will be free when the model is deleted
|
521
|
+
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
|
522
|
+
struct llama_model * model,
|
523
|
+
const char * path_lora);
|
524
|
+
|
525
|
+
// Add a loaded LoRA adapter to given context
|
526
|
+
// This will not modify model's weight
|
527
|
+
LLAMA_API int32_t llama_lora_adapter_set(
|
528
|
+
struct llama_context * ctx,
|
529
|
+
struct llama_lora_adapter * adapter,
|
530
|
+
float scale);
|
531
|
+
|
532
|
+
// Remove a LoRA adapter from given context
|
533
|
+
// Return -1 if the adapter is not present in the context
|
534
|
+
LLAMA_API int32_t llama_lora_adapter_remove(
|
535
|
+
struct llama_context * ctx,
|
536
|
+
struct llama_lora_adapter * adapter);
|
537
|
+
|
538
|
+
// Manually free a LoRA adapter
|
539
|
+
// Note: loaded adapters will be free when the associated model is deleted
|
540
|
+
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
|
525
541
|
|
526
542
|
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
527
543
|
// the currently loaded vector.
|