llama_cpp 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,7 +6,7 @@
6
6
  #include <stdbool.h>
7
7
 
8
8
  #ifdef LLAMA_SHARED
9
- # ifdef _WIN32
9
+ # if defined(_WIN32) && !defined(__MINGW32__)
10
10
  # ifdef LLAMA_BUILD
11
11
  # define LLAMA_API __declspec(dllexport)
12
12
  # else
@@ -20,7 +20,7 @@
20
20
  #endif
21
21
 
22
22
  #define LLAMA_FILE_VERSION 1
23
- #define LLAMA_FILE_MAGIC 0x67676d66 // 'ggmf' in hex
23
+ #define LLAMA_FILE_MAGIC 0x67676a74 // 'ggjt' in hex
24
24
  #define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
25
25
 
26
26
  #ifdef __cplusplus
@@ -83,6 +83,23 @@ extern "C" {
83
83
  const char * fname_out,
84
84
  int itype);
85
85
 
86
+ // Returns the KV cache that will contain the context for the
87
+ // ongoing prediction with the model.
88
+ LLAMA_API const uint8_t * llama_get_kv_cache(struct llama_context * ctx);
89
+
90
+ // Returns the size of the KV cache
91
+ LLAMA_API size_t llama_get_kv_cache_size(struct llama_context * ctx);
92
+
93
+ // Returns the number of tokens in the KV cache
94
+ LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx);
95
+
96
+ // Sets the KV cache containing the current context for the model
97
+ LLAMA_API void llama_set_kv_cache(
98
+ struct llama_context * ctx,
99
+ const uint8_t * kv_cache,
100
+ size_t n_size,
101
+ int n_token_count);
102
+
86
103
  // Run the llama inference to obtain the logits and probabilities for the next token.
87
104
  // tokens + n_tokens is the provided batch of new tokens to process
88
105
  // n_past is the number of tokens to use from previous eval calls
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.0.1'
6
+ VERSION = '0.0.3'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-2a98bc1'
9
+ LLAMA_CPP_VERSION = 'master-698f7b5'
10
10
  end
data/lib/llama_cpp.rb CHANGED
@@ -5,14 +5,18 @@ require_relative 'llama_cpp/llama_cpp'
5
5
 
6
6
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
7
7
  module LLaMACpp
8
+ # Class alias to match interface of whispercpp gem.
9
+ Params = ContextParams
10
+
8
11
  module_function
9
12
 
10
13
  # Generates sentences following the given prompt for operation check.
11
14
  #
12
15
  # @param context [LLaMACpp::Context]
13
16
  # @param prompt [String]
17
+ # @param n_threads [Integer]
14
18
  # @return [String]
15
- def generate(context, prompt) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
19
+ def generate(context, prompt, n_threads: 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
16
20
  prompt.insert(0, ' ')
17
21
 
18
22
  embd_input = context.tokenize(text: prompt, add_bos: true)
@@ -36,7 +40,7 @@ module LLaMACpp
36
40
  embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
37
41
  end
38
42
 
39
- context.eval(tokens: embd, n_past: n_past)
43
+ context.eval(tokens: embd, n_past: n_past, n_threads: n_threads)
40
44
  end
41
45
 
42
46
  n_past += embd.size
data/sig/llama_cpp.rbs ADDED
@@ -0,0 +1,52 @@
1
+ module LLaMACpp
2
+ VERSION: String
3
+ LLAMA_CPP_VERSION: String
4
+ LLAMA_FILE_VERSION: String
5
+ LLAMA_FILE_MAGIC: String
6
+ LLAMA_FILE_MAGIC_UNVERSIONED: String
7
+
8
+ def self?.generate: (::LLaMACpp::Context, String, ?n_threads: Integer) -> String
9
+ def self?.print_system_info: () -> void
10
+ def self?.token_bos: () -> Integer
11
+ def self?.token_eos: () -> Integer
12
+
13
+ class Context
14
+ public
15
+
16
+ def initialize: (model_path: String, params: ::LLaMACpp::ContextParams) -> void
17
+ def embeddings: () -> Array[Float]
18
+ def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer, ?n_threads: Integer) -> Qnil
19
+ def logits: () -> Array[Float]
20
+ def n_ctx: () -> Integer
21
+ def n_embd: () -> Integer
22
+ def n_vocab: () -> Integer
23
+ def print_timings: () -> void
24
+ def reset_timings: () -> void
25
+ def sample_top_p_top_k: (top_k: Integer, top_p: Float, temp: Float, penalty: Float) -> Integer
26
+ def token_to_str: (Integer) -> String
27
+ def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
28
+ end
29
+
30
+ class ContextParams
31
+ public
32
+
33
+ def embedding: () -> bool
34
+ def embedding=: (bool) -> bool
35
+ def f16_kv: () -> bool
36
+ def f16_kv=: (bool) -> bool
37
+ def logits_all: () -> bool
38
+ def logits_all=: (bool) -> bool
39
+ def n_ctx: () -> Integer
40
+ def n_ctx=: (Integer) -> Integer
41
+ def n_parts: () -> Integer
42
+ def n_parts=: (Integer) -> Integer
43
+ def seed: () -> Integer
44
+ def seed=: (Integer) -> Integer
45
+ def use_mlock: () -> bool
46
+ def use_mlock=: (bool) -> bool
47
+ def vocab_only: () -> bool
48
+ def vocab_only=: (bool) -> bool
49
+ end
50
+
51
+ class Params = ContextParams
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-02 00:00:00.000000000 Z
11
+ date: 2023-04-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -32,6 +32,7 @@ files:
32
32
  - ext/llama_cpp/src/llama.h
33
33
  - lib/llama_cpp.rb
34
34
  - lib/llama_cpp/version.rb
35
+ - sig/llama_cpp.rbs
35
36
  homepage: https://github.com/yoshoku/llama_cpp.rb
36
37
  licenses:
37
38
  - MIT