llama_cpp 0.14.4 → 0.14.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/examples/chat.rb +2 -4
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +23 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +10 -0
- data/vendor/tmp/llama.cpp/LICENSE +1 -1
- data/vendor/tmp/llama.cpp/Makefile +11 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +7 -3
- data/vendor/tmp/llama.cpp/ggml-quants.c +155 -155
- data/vendor/tmp/llama.cpp/ggml-quants.h +82 -82
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +878 -216
- data/vendor/tmp/llama.cpp/ggml.c +8 -8
- data/vendor/tmp/llama.cpp/ggml.h +7 -7
- data/vendor/tmp/llama.cpp/llama.cpp +686 -124
- data/vendor/tmp/llama.cpp/llama.h +81 -13
- metadata +2 -2
@@ -37,10 +37,14 @@
|
|
37
37
|
|
38
38
|
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
|
39
39
|
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
40
|
+
#define LLAMA_FILE_MAGIC_GGSQ 0x67677371u // 'ggsq'
|
40
41
|
|
41
42
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
42
43
|
#define LLAMA_SESSION_VERSION 5
|
43
44
|
|
45
|
+
#define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
|
46
|
+
#define LLAMA_STATE_SEQ_VERSION 1
|
47
|
+
|
44
48
|
#ifdef __cplusplus
|
45
49
|
extern "C" {
|
46
50
|
#endif
|
@@ -523,6 +527,7 @@ extern "C" {
|
|
523
527
|
struct llama_context * ctx);
|
524
528
|
|
525
529
|
// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
530
|
+
// Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
|
526
531
|
// seq_id < 0 : match any sequence
|
527
532
|
// p0 < 0 : [0, p1]
|
528
533
|
// p1 < 0 : [p0, inf)
|
@@ -594,34 +599,92 @@ extern "C" {
|
|
594
599
|
|
595
600
|
// Returns the maximum size in bytes of the state (rng, logits, embedding
|
596
601
|
// and kv_cache) - will often be smaller after compacting tokens
|
597
|
-
LLAMA_API size_t
|
602
|
+
LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
|
603
|
+
LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx),
|
604
|
+
"use llama_state_get_size instead");
|
598
605
|
|
599
606
|
// Copies the state to the specified destination address.
|
600
607
|
// Destination needs to have allocated enough memory.
|
601
608
|
// Returns the number of bytes copied
|
602
|
-
LLAMA_API size_t
|
609
|
+
LLAMA_API size_t llama_state_get_data(
|
603
610
|
struct llama_context * ctx,
|
604
611
|
uint8_t * dst);
|
612
|
+
LLAMA_API DEPRECATED(size_t llama_copy_state_data(
|
613
|
+
struct llama_context * ctx,
|
614
|
+
uint8_t * dst),
|
615
|
+
"use llama_state_get_data instead");
|
605
616
|
|
606
617
|
// Set the state reading from the specified address
|
607
618
|
// Returns the number of bytes read
|
608
|
-
LLAMA_API size_t
|
619
|
+
LLAMA_API size_t llama_state_set_data(
|
609
620
|
struct llama_context * ctx,
|
610
621
|
const uint8_t * src);
|
622
|
+
LLAMA_API DEPRECATED(size_t llama_set_state_data(
|
623
|
+
struct llama_context * ctx,
|
624
|
+
const uint8_t * src),
|
625
|
+
"use llama_state_set_data instead");
|
611
626
|
|
612
627
|
// Save/load session file
|
613
|
-
LLAMA_API bool
|
628
|
+
LLAMA_API bool llama_state_load_file(
|
614
629
|
struct llama_context * ctx,
|
615
630
|
const char * path_session,
|
616
631
|
llama_token * tokens_out,
|
617
632
|
size_t n_token_capacity,
|
618
633
|
size_t * n_token_count_out);
|
634
|
+
LLAMA_API DEPRECATED(bool llama_load_session_file(
|
635
|
+
struct llama_context * ctx,
|
636
|
+
const char * path_session,
|
637
|
+
llama_token * tokens_out,
|
638
|
+
size_t n_token_capacity,
|
639
|
+
size_t * n_token_count_out),
|
640
|
+
"use llama_state_load_file instead");
|
619
641
|
|
620
|
-
LLAMA_API bool
|
642
|
+
LLAMA_API bool llama_state_save_file(
|
621
643
|
struct llama_context * ctx,
|
622
644
|
const char * path_session,
|
623
645
|
const llama_token * tokens,
|
624
646
|
size_t n_token_count);
|
647
|
+
LLAMA_API DEPRECATED(bool llama_save_session_file(
|
648
|
+
struct llama_context * ctx,
|
649
|
+
const char * path_session,
|
650
|
+
const llama_token * tokens,
|
651
|
+
size_t n_token_count),
|
652
|
+
"use llama_state_save_file instead");
|
653
|
+
|
654
|
+
// Get the exact size needed to copy the KV cache of a single sequence
|
655
|
+
LLAMA_API size_t llama_state_seq_get_size(
|
656
|
+
struct llama_context * ctx,
|
657
|
+
llama_seq_id seq_id);
|
658
|
+
|
659
|
+
// Copy the KV cache of a single sequence into the specified buffer
|
660
|
+
LLAMA_API size_t llama_state_seq_get_data(
|
661
|
+
struct llama_context * ctx,
|
662
|
+
uint8_t * dst,
|
663
|
+
llama_seq_id seq_id);
|
664
|
+
|
665
|
+
// Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence
|
666
|
+
// Returns:
|
667
|
+
// - Positive: Ok
|
668
|
+
// - Zero: Failed to load
|
669
|
+
LLAMA_API size_t llama_state_seq_set_data(
|
670
|
+
struct llama_context * ctx,
|
671
|
+
const uint8_t * src,
|
672
|
+
llama_seq_id dest_seq_id);
|
673
|
+
|
674
|
+
LLAMA_API size_t llama_state_seq_save_file(
|
675
|
+
struct llama_context * ctx,
|
676
|
+
const char * filepath,
|
677
|
+
llama_seq_id seq_id,
|
678
|
+
const llama_token * tokens,
|
679
|
+
size_t n_token_count);
|
680
|
+
|
681
|
+
LLAMA_API size_t llama_state_seq_load_file(
|
682
|
+
struct llama_context * ctx,
|
683
|
+
const char * filepath,
|
684
|
+
llama_seq_id dest_seq_id,
|
685
|
+
llama_token * tokens_out,
|
686
|
+
size_t n_token_capacity,
|
687
|
+
size_t * n_token_count_out);
|
625
688
|
|
626
689
|
//
|
627
690
|
// Decoding
|
@@ -684,8 +747,9 @@ extern "C" {
|
|
684
747
|
// Cols: n_vocab
|
685
748
|
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
686
749
|
|
687
|
-
// Logits for the ith token. Equivalent to:
|
750
|
+
// Logits for the ith token. For positive indices, Equivalent to:
|
688
751
|
// llama_get_logits(ctx) + ctx->output_ids[i]*n_vocab
|
752
|
+
// Negative indicies can be used to access logits in reverse order, -1 is the last logit.
|
689
753
|
// returns NULL for invalid ids.
|
690
754
|
LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i);
|
691
755
|
|
@@ -697,8 +761,9 @@ extern "C" {
|
|
697
761
|
// Otherwise, returns NULL.
|
698
762
|
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
699
763
|
|
700
|
-
// Get the embeddings for the ith token. Equivalent to:
|
764
|
+
// Get the embeddings for the ith token. For positive indices, Equivalent to:
|
701
765
|
// llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
|
766
|
+
// Negative indicies can be used to access embeddings in reverse order, -1 is the last embedding.
|
702
767
|
// shape: [n_embd] (1-dimensional)
|
703
768
|
// returns NULL for invalid ids.
|
704
769
|
LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
|
@@ -721,6 +786,8 @@ extern "C" {
|
|
721
786
|
// Special tokens
|
722
787
|
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
|
723
788
|
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
|
789
|
+
LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification
|
790
|
+
LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator
|
724
791
|
LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
|
725
792
|
|
726
793
|
// Returns -1 if unknown, 1 for true or 0 for false.
|
@@ -743,16 +810,16 @@ extern "C" {
|
|
743
810
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
744
811
|
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
745
812
|
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
746
|
-
/// @param
|
747
|
-
///
|
813
|
+
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
814
|
+
/// as plaintext. Does not insert a leading space.
|
748
815
|
LLAMA_API int32_t llama_tokenize(
|
749
816
|
const struct llama_model * model,
|
750
817
|
const char * text,
|
751
818
|
int32_t text_len,
|
752
819
|
llama_token * tokens,
|
753
820
|
int32_t n_tokens_max,
|
754
|
-
bool
|
755
|
-
bool
|
821
|
+
bool add_special,
|
822
|
+
bool parse_special);
|
756
823
|
|
757
824
|
// Token Id -> Piece.
|
758
825
|
// Uses the vocabulary in the provided context.
|
@@ -1030,10 +1097,11 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
|
|
1030
1097
|
struct llama_context * ctx
|
1031
1098
|
);
|
1032
1099
|
|
1033
|
-
|
1100
|
+
void llama_grammar_accept(
|
1034
1101
|
const std::vector<std::vector<llama_grammar_element>> & rules,
|
1035
1102
|
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
|
1036
|
-
const uint32_t chr
|
1103
|
+
const uint32_t chr,
|
1104
|
+
std::vector<std::vector<const llama_grammar_element *>> & new_stacks);
|
1037
1105
|
|
1038
1106
|
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
1039
1107
|
const std::string & src,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.14.
|
4
|
+
version: 0.14.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-04-
|
11
|
+
date: 2024-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|