llama_cpp 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@
6
6
  #include <stdbool.h>
7
7
 
8
8
  #ifdef LLAMA_SHARED
9
- # ifdef _WIN32
9
+ # if defined(_WIN32) && !defined(__MINGW32__)
10
10
  # ifdef LLAMA_BUILD
11
11
  # define LLAMA_API __declspec(dllexport)
12
12
  # else
@@ -20,7 +20,7 @@
20
20
  #endif
21
21
 
22
22
  #define LLAMA_FILE_VERSION 1
23
- #define LLAMA_FILE_MAGIC 0x67676d66 // 'ggmf' in hex
23
+ #define LLAMA_FILE_MAGIC 0x67676a74 // 'ggjt' in hex
24
24
  #define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
25
25
 
26
26
  #ifdef __cplusplus
@@ -83,6 +83,23 @@ extern "C" {
83
83
  const char * fname_out,
84
84
  int itype);
85
85
 
86
+ // Returns the KV cache that will contain the context for the
87
+ // ongoing prediction with the model.
88
+ LLAMA_API const uint8_t * llama_get_kv_cache(struct llama_context * ctx);
89
+
90
+ // Returns the size of the KV cache
91
+ LLAMA_API size_t llama_get_kv_cache_size(struct llama_context * ctx);
92
+
93
+ // Returns the number of tokens in the KV cache
94
+ LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx);
95
+
96
+ // Sets the KV cache containing the current context for the model
97
+ LLAMA_API void llama_set_kv_cache(
98
+ struct llama_context * ctx,
99
+ const uint8_t * kv_cache,
100
+ size_t n_size,
101
+ int n_token_count);
102
+
86
103
  // Run the llama inference to obtain the logits and probabilities for the next token.
87
104
  // tokens + n_tokens is the provided batch of new tokens to process
88
105
  // n_past is the number of tokens to use from previous eval calls
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.0.1'
6
+ VERSION = '0.0.3'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-2a98bc1'
9
+ LLAMA_CPP_VERSION = 'master-698f7b5'
10
10
  end
data/lib/llama_cpp.rb CHANGED
@@ -5,14 +5,18 @@ require_relative 'llama_cpp/llama_cpp'
5
5
 
6
6
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
7
7
  module LLaMACpp
8
+ # Class alias to match interface of whispercpp gem.
9
+ Params = ContextParams
10
+
8
11
  module_function
9
12
 
10
13
  # Generates sentences following the given prompt for operation check.
11
14
  #
12
15
  # @param context [LLaMACpp::Context]
13
16
  # @param prompt [String]
17
+ # @param n_threads [Integer]
14
18
  # @return [String]
15
- def generate(context, prompt) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
19
+ def generate(context, prompt, n_threads: 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
16
20
  prompt.insert(0, ' ')
17
21
 
18
22
  embd_input = context.tokenize(text: prompt, add_bos: true)
@@ -36,7 +40,7 @@ module LLaMACpp
36
40
  embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
37
41
  end
38
42
 
39
- context.eval(tokens: embd, n_past: n_past)
43
+ context.eval(tokens: embd, n_past: n_past, n_threads: n_threads)
40
44
  end
41
45
 
42
46
  n_past += embd.size
data/sig/llama_cpp.rbs ADDED
@@ -0,0 +1,52 @@
1
+ module LLaMACpp
2
+ VERSION: String
3
+ LLAMA_CPP_VERSION: String
4
+ LLAMA_FILE_VERSION: String
5
+ LLAMA_FILE_MAGIC: String
6
+ LLAMA_FILE_MAGIC_UNVERSIONED: String
7
+
8
+ def self?.generate: (::LLaMACpp::Context, String, ?n_threads: Integer) -> String
9
+ def self?.print_system_info: () -> void
10
+ def self?.token_bos: () -> Integer
11
+ def self?.token_eos: () -> Integer
12
+
13
+ class Context
14
+ public
15
+
16
+ def initialize: (model_path: String, params: ::LLaMACpp::ContextParams) -> void
17
+ def embeddings: () -> Array[Float]
18
+ def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer, ?n_threads: Integer) -> Qnil
19
+ def logits: () -> Array[Float]
20
+ def n_ctx: () -> Integer
21
+ def n_embd: () -> Integer
22
+ def n_vocab: () -> Integer
23
+ def print_timings: () -> void
24
+ def reset_timings: () -> void
25
+ def sample_top_p_top_k: (top_k: Integer, top_p: Float, temp: Float, penalty: Float) -> Integer
26
+ def token_to_str: (Integer) -> String
27
+ def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
28
+ end
29
+
30
+ class ContextParams
31
+ public
32
+
33
+ def embedding: () -> bool
34
+ def embedding=: (bool) -> bool
35
+ def f16_kv: () -> bool
36
+ def f16_kv=: (bool) -> bool
37
+ def logits_all: () -> bool
38
+ def logits_all=: (bool) -> bool
39
+ def n_ctx: () -> Integer
40
+ def n_ctx=: (Integer) -> Integer
41
+ def n_parts: () -> Integer
42
+ def n_parts=: (Integer) -> Integer
43
+ def seed: () -> Integer
44
+ def seed=: (Integer) -> Integer
45
+ def use_mlock: () -> bool
46
+ def use_mlock=: (bool) -> bool
47
+ def vocab_only: () -> bool
48
+ def vocab_only=: (bool) -> bool
49
+ end
50
+
51
+ class Params = ContextParams
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-02 00:00:00.000000000 Z
11
+ date: 2023-04-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -32,6 +32,7 @@ files:
32
32
  - ext/llama_cpp/src/llama.h
33
33
  - lib/llama_cpp.rb
34
34
  - lib/llama_cpp/version.rb
35
+ - sig/llama_cpp.rbs
35
36
  homepage: https://github.com/yoshoku/llama_cpp.rb
36
37
  licenses:
37
38
  - MIT