RubyGems - llama_cpp - Versions diffs - 0.1.1 → 0.1.2 - Mend

llama_cpp 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/ext/llama_cpp/extconf.rb +7 -0
data/ext/llama_cpp/llama_cpp.cpp +60 -6
data/ext/llama_cpp/src/ggml-cuda.h +2 -0
data/ext/llama_cpp/src/ggml-opencl.c +246 -133
data/ext/llama_cpp/src/ggml.c +362 -137
data/ext/llama_cpp/src/ggml.h +13 -3
data/ext/llama_cpp/src/llama-util.h +23 -23
data/ext/llama_cpp/src/llama.cpp +173 -102
data/ext/llama_cpp/src/llama.h +30 -17
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +2 -0
data/sig/llama_cpp.rbs +1 -0
metadata +2 -2

data/ext/llama_cpp/src/llama.h CHANGED Viewed

@@ -19,10 +19,16 @@
 #    define LLAMA_API
 #endif
-#define LLAMA_FILE_VERSION           2
-#define LLAMA_FILE_MAGIC             'ggjt'
-#define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
-#define LLAMA_SESSION_MAGIC          'ggsn'
+#define LLAMA_FILE_MAGIC_GGJT        0x67676a74u // 'ggjt'
+#define LLAMA_FILE_MAGIC_GGLA        0x67676c61u // 'ggla'
+#define LLAMA_FILE_MAGIC_GGMF        0x67676d66u // 'ggmf'
+#define LLAMA_FILE_MAGIC_GGML        0x67676d6cu // 'ggml'
+#define LLAMA_FILE_MAGIC_GGSN        0x6767736eu // 'ggsn'
+#define LLAMA_FILE_VERSION           3
+#define LLAMA_FILE_MAGIC             LLAMA_FILE_MAGIC_GGJT
+#define LLAMA_FILE_MAGIC_UNVERSIONED LLAMA_FILE_MAGIC_GGML
+#define LLAMA_SESSION_MAGIC          LLAMA_FILE_MAGIC_GGSN
 #define LLAMA_SESSION_VERSION        1
 #ifdef __cplusplus
@@ -40,9 +46,9 @@ extern "C" {
     typedef int llama_token;
     typedef struct llama_token_data {
-        llama_token id;  // token id
-        float logit; // log-odds of the token
-        float p;     // probability of the token
+        llama_token id; // token id
+        float logit;    // log-odds of the token
+        float p;        // probability of the token
     } llama_token_data;
     typedef struct llama_token_data_array {
@@ -73,16 +79,16 @@ extern "C" {
     // model file types
     enum llama_ftype {
-        LLAMA_FTYPE_ALL_F32     = 0,
-        LLAMA_FTYPE_MOSTLY_F16  = 1,  // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_0 = 2,  // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_1 = 3,  // except 1d tensors
+        LLAMA_FTYPE_ALL_F32              = 0,
+        LLAMA_FTYPE_MOSTLY_F16           = 1, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_0          = 2, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_1          = 3, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
-        // LLAMA_FTYPE_MOSTLY_Q4_2 = 5,  // support has been removed
-        // LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
-        LLAMA_FTYPE_MOSTLY_Q8_0 = 7,  // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_0 = 8,  // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_1 = 9,  // except 1d tensors
+        // LLAMA_FTYPE_MOSTLY_Q4_2       = 5, // support has been removed
+        // LLAMA_FTYPE_MOSTLY_Q4_3       = 6, // support has been removed
+        LLAMA_FTYPE_MOSTLY_Q8_0          = 7, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_0          = 8, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_1          = 9, // except 1d tensors
     };
     LLAMA_API struct llama_context_params llama_context_default_params();
@@ -90,6 +96,13 @@ extern "C" {
     LLAMA_API bool llama_mmap_supported();
     LLAMA_API bool llama_mlock_supported();
+    // TODO: not great API - very likely to change
+    // Initialize the llama + ggml backend
+    // Call once at the start of the program
+    LLAMA_API void llama_init_backend();
+    LLAMA_API int64_t llama_time_us();
     // Various functions for loading a ggml llama model.
     // Allocate (almost) all memory needed for the model.
     // Return NULL on failure
@@ -138,7 +151,7 @@ extern "C" {
     // Set the state reading from the specified address
     // Returns the number of bytes read
-    LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src);
+    LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src);
     // Save/load session file
     LLAMA_API bool llama_load_session_file(struct llama_context * ctx, const char * path_session, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out);

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.1.1'
+  VERSION = '0.1.2'
   # The version of llama.cpp bundled with llama_cpp.rb.
-  LLAMA_CPP_VERSION = 'master-6986c78'
+  LLAMA_CPP_VERSION = 'master-265db98'
 end

data/lib/llama_cpp.rb CHANGED Viewed

@@ -106,3 +106,5 @@ module LLaMACpp
     output.join.delete_prefix(spaced_prompt).strip
   end
 end
+LLaMACpp.init_backend

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -14,6 +14,7 @@ module LLaMACpp
   LLAMA_FTYPE_MOSTLY_Q5_0: Integer
   LLAMA_FTYPE_MOSTLY_Q5_1: Integer
+  def self?.init_backend: () -> void
   def self?.model_quantize: (input_path: String, output_path: String, ftype: Integer, ?n_threads: Integer) -> void
   def self?.generate: (::LLaMACpp::Context, String, ?n_predict: Integer, ?n_threads: Integer) -> String
   def self?.print_system_info: () -> void

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-05-21 00:00:00.000000000 Z
+date: 2023-05-22 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email: