RubyGems - rllama - Versions diffs - 1.0.1-aarch64-linux-gnu → 1.0.3-aarch64-linux-gnu - Mend

rllama 1.0.1-aarch64-linux-gnu → 1.0.3-aarch64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 828251b5eb99195371a26fbb54a9fa7ccce0f96928ba1cd096e72b6548617614
-  data.tar.gz: b6cc86f6b97b330260fc1c7d23b21696cc0157014bd3c1fee6cebbb874b4a269
+  metadata.gz: f19cb41b6197028ad125265d82dc01ca449cc6c48b5179433aefa938f139662b
+  data.tar.gz: 6ae54e060e85ce6abbfc8a22b8e5f6f66180685403516485757147eb3978e542
 SHA512:
-  metadata.gz: de4391b2f2fc9e59be8dd23930aa2992594f3b42664a15cc000f58a5d5b54ec8533ce1aa6e86e4af37080f612d18cb36e268f3ce49bdf7f399bf0eded3579a47
-  data.tar.gz: aade6c526d6c817b05ba82a9c29ccf4d61aa566452dcf4ebca39650447abd02ae933236071bbf08522c5a3aad9d250baafef7eb85fe3edc7b1fd7a41c5d4578a
+  metadata.gz: dd391eaaf38dbf33bb72eb561200c9893fa7645d09948c971763ef8b8a89a1d82328d527c5fa256ae7af42f70b45edff4b2121ba45b65c3b90107aaab91523d5
+  data.tar.gz: 84f9fee026a509fe6e6c149d6d20e72e00bfbdee94e50a999b7dee8ce15d7b56f1dc4015860ce5c2e3aa8e9a643dab89e2826bef1dd1a2c98d24bcf819a27a09

data/README.md CHANGED Viewed

@@ -1,3 +1,5 @@
+<img width="336" height="212.0" alt="Logo" src="https://github.com/user-attachments/assets/e27442fb-22d1-44cf-ba3d-f10b24c13652" />
 # Rllama
 Ruby bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) to run open-source language models locally. Run models like GPT-OSS, Qwen 3, Gemma 3, Llama 3, and many others directly in your Ruby application code.
@@ -22,6 +24,38 @@ Or install it yourself as:
 gem install rllama
 ```
+## CLI Chat
+The `rllama` command-line utility provides an interactive chat interface for conversing with language models. After installing the gem, you can start chatting immediately:
+```bash
+rllama
+```
+When you run `rllama` without arguments, it will display:
+- **Downloaded models**: Any models you've already downloaded to `~/.rllama/models/`
+- **Popular models**: A curated list of popular models available for download, including:
+  - Gemma 3 1B
+  - Llama 3.2 3B
+  - Phi-4
+  - Qwen3 30B
+  - GPT-OSS
+Simply enter the number of the model you want to use. If you select a model that hasn't been downloaded yet, it will be automatically downloaded from Hugging Face.
+You can also specify a model path or URL directly:
+```bash
+rllama path/to/your/model.gguf
+```
+```bash
+rllama https://huggingface.co/microsoft/phi-4-gguf/resolve/main/phi-4-Q3_K_S.gguf
+```
+Once the model has loaded, you can start chatting.
 ## Usage
 ### Text Generation
@@ -162,43 +196,8 @@ By default, embedding vectors are normalized. You can disable normalization with
 ```ruby
 # Generate unnormalized embeddings
 embedding = model.embed('Sample text', normalize: false)
-# Use custom batch size for processing multiple texts
-embeddings = model.embed(
-  ['roses are red', 'violets are blue', 'sugar is sweet'],
-  normalize: true
-)
-```
-## CLI Chat Utility
-The `rllama` command-line utility provides an interactive chat interface for conversing with language models. After installing the gem, you can start chatting immediately:
-```bash
-rllama
-```
-When you run `rllama` without arguments, it will display:
-- **Downloaded models**: Any models you've already downloaded to `~/.rllama/models/`
-- **Popular models**: A curated list of popular models available for download, including:
-  - Gemma 3 1B
-  - Llama 3.2 3B
-  - Phi-4
-  - Qwen3 30B
-  - GPT-OSS
-  - And more...
-Simply enter the number of the model you want to use. If you select a model that hasn't been downloaded yet, it will be automatically downloaded from Hugging Face.
-You can also specify a model path directly:
-```bash
-rllama path/to/your/model.gguf
 ```
-Once the model loads, you can start chatting.
 ## Finding Models
 You can download GGUF format models from various sources:

data/lib/rllama/context.rb CHANGED Viewed

@@ -1,10 +1,12 @@
 # frozen_string_literal: true
+require 'etc'
 module Rllama
   class Context
     attr_reader :messages, :n_ctx, :n_batch, :n_past
-    def initialize(model, embeddings: false, n_ctx: nil, n_batch: nil)
+    def initialize(model, embeddings: false, n_ctx: nil, n_batch: nil, n_threads: Etc.nprocessors)
       @model = model
       @n_ctx = n_ctx
       @n_batch = n_batch
@@ -15,6 +17,9 @@ module Rllama
       @ctx_params[:n_ctx] = @n_ctx if @n_ctx
       @ctx_params[:n_batch] = @n_batch if @n_batch
+      @ctx_params[:n_threads] = n_threads
+      @ctx_params[:n_threads_batch] = n_threads
       if @embeddings
         seq_cap = @model.n_seq_max

data/lib/rllama/cpp.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Rllama
     LIB_NAME = 'llama'
-    platform =
+    PLATFORM =
       case FFI::Platform::OS
       when 'darwin'
         FFI::Platform::ARCH == 'aarch64' ? 'arm64-darwin' : 'x86_64-darwin'
@@ -28,12 +28,27 @@ module Rllama
         "lib#{LIB_NAME}.so"
       end
-    platform_dir = File.join(__dir__, platform)
-    platform_path = File.join(platform_dir, lib_file)
+    PLATFORM_DIR = File.join(__dir__, PLATFORM)
+    platform_path = File.join(PLATFORM_DIR, lib_file)
     lib_paths = []
     lib_paths << platform_path if File.exist?(platform_path)
+    ggml_lib_file =
+      case FFI::Platform::OS
+      when 'darwin'
+        'libggml.dylib'
+      when 'windows', 'mingw32'
+        'ggml.dll'
+      else
+        'libggml.so'
+      end
+    ggml_platform_path = File.join(PLATFORM_DIR, ggml_lib_file)
+    lib_paths << ggml_platform_path if File.exist?(ggml_platform_path)
     lib_paths +=
       case FFI::Platform::OS
       when 'darwin'
@@ -436,6 +451,8 @@ module Rllama
     attach_function :llama_backend_init, [], :void
     attach_function :llama_backend_free, [], :void
     attach_function :llama_numa_init, [:int], :void # ggml_numa_strategy
+    attach_function :ggml_backend_load_all, [], :void
+    attach_function :ggml_backend_load_all_from_path, [:string], :void
     # Threadpool
     attach_function :llama_attach_threadpool, %i[llama_context_p ggml_threadpool_t ggml_threadpool_t], :void
@@ -681,10 +698,14 @@ module Rllama
       llama_log_set(@log_callback, nil)
     end
-    llama_backend_init
     silence_log!
+    if File.directory?(PLATFORM_DIR)
+      ggml_backend_load_all_from_path(PLATFORM_DIR)
+    else
+      ggml_backend_load_all
+    end
     freeze
   end
 end

data/lib/rllama/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Rllama
-  VERSION = '1.0.1'
+  VERSION = '1.0.3'
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rllama
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.0.3
 platform: aarch64-linux-gnu
 authors:
 - Pete Matsyburka
 bindir: bin
 cert_chain: []
-date: 2025-10-05 00:00:00.000000000 Z
+date: 2025-10-07 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ffi