cui-llama.rn 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/llama.h CHANGED
@@ -40,7 +40,7 @@
40
40
  #define LLAMA_FILE_MAGIC_GGSQ 0x67677371u // 'ggsq'
41
41
 
42
42
  #define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
43
- #define LLAMA_SESSION_VERSION 6
43
+ #define LLAMA_SESSION_VERSION 7
44
44
 
45
45
  #define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
46
46
  #define LLAMA_STATE_SEQ_VERSION 1
@@ -92,6 +92,9 @@ extern "C" {
92
92
  LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
93
93
  LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
94
94
  LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
95
+ LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
96
+ LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
97
+ LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
95
98
  };
96
99
 
97
100
  // note: these values should be synchronized with lm_ggml_rope
@@ -133,7 +136,7 @@ extern "C" {
133
136
  LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
134
137
  LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
135
138
  LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
136
- LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
139
+ // LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
137
140
  // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
138
141
  // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
139
142
  LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
@@ -411,6 +414,9 @@ extern "C" {
411
414
  const char * content;
412
415
  } llama_chat_message;
413
416
 
417
+ // lora adapter
418
+ struct llama_lora_adapter;
419
+
414
420
  // Helpers for getting default parameters
415
421
  LLAMA_API struct llama_model_params llama_model_default_params(void);
416
422
  LLAMA_API struct llama_context_params llama_context_default_params(void);
@@ -510,18 +516,28 @@ extern "C" {
510
516
  const char * fname_out,
511
517
  const llama_model_quantize_params * params);
512
518
 
513
- // Apply a LoRA adapter to a loaded model
514
- // path_base_model is the path to a higher quality model to use as a base for
515
- // the layers modified by the adapter. Can be NULL to use the current loaded model.
516
- // The model needs to be reloaded before applying a new adapter, otherwise the adapter
517
- // will be applied on top of the previous one
518
- // Returns 0 on success
519
- LLAMA_API int32_t llama_model_apply_lora_from_file(
520
- const struct llama_model * model,
521
- const char * path_lora,
522
- float scale,
523
- const char * path_base_model,
524
- int32_t n_threads);
519
+ // Load a LoRA adapter from file
520
+ // The loaded adapter will be associated to the given model, and will be free when the model is deleted
521
+ LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
522
+ struct llama_model * model,
523
+ const char * path_lora);
524
+
525
+ // Add a loaded LoRA adapter to given context
526
+ // This will not modify model's weight
527
+ LLAMA_API int32_t llama_lora_adapter_set(
528
+ struct llama_context * ctx,
529
+ struct llama_lora_adapter * adapter,
530
+ float scale);
531
+
532
+ // Remove a LoRA adapter from given context
533
+ // Return -1 if the adapter is not present in the context
534
+ LLAMA_API int32_t llama_lora_adapter_remove(
535
+ struct llama_context * ctx,
536
+ struct llama_lora_adapter * adapter);
537
+
538
+ // Manually free a LoRA adapter
539
+ // Note: loaded adapters will be free when the associated model is deleted
540
+ LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
525
541
 
526
542
  // Apply a loaded control vector to a llama_context, or if data is NULL, clear
527
543
  // the currently loaded vector.