llama_cpp 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,10 +19,16 @@
19
19
  # define LLAMA_API
20
20
  #endif
21
21
 
22
- #define LLAMA_FILE_VERSION 2
23
- #define LLAMA_FILE_MAGIC 'ggjt'
24
- #define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
25
- #define LLAMA_SESSION_MAGIC 'ggsn'
22
+ #define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt'
23
+ #define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
24
+ #define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf'
25
+ #define LLAMA_FILE_MAGIC_GGML 0x67676d6cu // 'ggml'
26
+ #define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
27
+
28
+ #define LLAMA_FILE_VERSION 3
29
+ #define LLAMA_FILE_MAGIC LLAMA_FILE_MAGIC_GGJT
30
+ #define LLAMA_FILE_MAGIC_UNVERSIONED LLAMA_FILE_MAGIC_GGML
31
+ #define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
26
32
  #define LLAMA_SESSION_VERSION 1
27
33
 
28
34
  #ifdef __cplusplus
@@ -40,9 +46,9 @@ extern "C" {
40
46
  typedef int llama_token;
41
47
 
42
48
  typedef struct llama_token_data {
43
- llama_token id; // token id
44
- float logit; // log-odds of the token
45
- float p; // probability of the token
49
+ llama_token id; // token id
50
+ float logit; // log-odds of the token
51
+ float p; // probability of the token
46
52
  } llama_token_data;
47
53
 
48
54
  typedef struct llama_token_data_array {
@@ -73,16 +79,16 @@ extern "C" {
73
79
 
74
80
  // model file types
75
81
  enum llama_ftype {
76
- LLAMA_FTYPE_ALL_F32 = 0,
77
- LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
78
- LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
79
- LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
82
+ LLAMA_FTYPE_ALL_F32 = 0,
83
+ LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
84
+ LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
85
+ LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
80
86
  LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
81
- // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82
- // LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
83
- LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
84
- LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
85
- LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
87
+ // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
88
+ // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
89
+ LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
90
+ LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
91
+ LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
86
92
  };
87
93
 
88
94
  LLAMA_API struct llama_context_params llama_context_default_params();
@@ -90,6 +96,13 @@ extern "C" {
90
96
  LLAMA_API bool llama_mmap_supported();
91
97
  LLAMA_API bool llama_mlock_supported();
92
98
 
99
+ // TODO: not great API - very likely to change
100
+ // Initialize the llama + ggml backend
101
+ // Call once at the start of the program
102
+ LLAMA_API void llama_init_backend();
103
+
104
+ LLAMA_API int64_t llama_time_us();
105
+
93
106
  // Various functions for loading a ggml llama model.
94
107
  // Allocate (almost) all memory needed for the model.
95
108
  // Return NULL on failure
@@ -138,7 +151,7 @@ extern "C" {
138
151
 
139
152
  // Set the state reading from the specified address
140
153
  // Returns the number of bytes read
141
- LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src);
154
+ LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src);
142
155
 
143
156
  // Save/load session file
144
157
  LLAMA_API bool llama_load_session_file(struct llama_context * ctx, const char * path_session, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out);
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.1.1'
6
+ VERSION = '0.1.2'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-6986c78'
9
+ LLAMA_CPP_VERSION = 'master-265db98'
10
10
  end
data/lib/llama_cpp.rb CHANGED
@@ -106,3 +106,5 @@ module LLaMACpp
106
106
  output.join.delete_prefix(spaced_prompt).strip
107
107
  end
108
108
  end
109
+
110
+ LLaMACpp.init_backend
data/sig/llama_cpp.rbs CHANGED
@@ -14,6 +14,7 @@ module LLaMACpp
14
14
  LLAMA_FTYPE_MOSTLY_Q5_0: Integer
15
15
  LLAMA_FTYPE_MOSTLY_Q5_1: Integer
16
16
 
17
+ def self?.init_backend: () -> void
17
18
  def self?.model_quantize: (input_path: String, output_path: String, ftype: Integer, ?n_threads: Integer) -> void
18
19
  def self?.generate: (::LLaMACpp::Context, String, ?n_predict: Integer, ?n_threads: Integer) -> String
19
20
  def self?.print_system_info: () -> void
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-21 00:00:00.000000000 Z
11
+ date: 2023-05-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: