rllama 1.0.1-aarch64-linux-gnu → 1.0.3-aarch64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 828251b5eb99195371a26fbb54a9fa7ccce0f96928ba1cd096e72b6548617614
4
- data.tar.gz: b6cc86f6b97b330260fc1c7d23b21696cc0157014bd3c1fee6cebbb874b4a269
3
+ metadata.gz: f19cb41b6197028ad125265d82dc01ca449cc6c48b5179433aefa938f139662b
4
+ data.tar.gz: 6ae54e060e85ce6abbfc8a22b8e5f6f66180685403516485757147eb3978e542
5
5
  SHA512:
6
- metadata.gz: de4391b2f2fc9e59be8dd23930aa2992594f3b42664a15cc000f58a5d5b54ec8533ce1aa6e86e4af37080f612d18cb36e268f3ce49bdf7f399bf0eded3579a47
7
- data.tar.gz: aade6c526d6c817b05ba82a9c29ccf4d61aa566452dcf4ebca39650447abd02ae933236071bbf08522c5a3aad9d250baafef7eb85fe3edc7b1fd7a41c5d4578a
6
+ metadata.gz: dd391eaaf38dbf33bb72eb561200c9893fa7645d09948c971763ef8b8a89a1d82328d527c5fa256ae7af42f70b45edff4b2121ba45b65c3b90107aaab91523d5
7
+ data.tar.gz: 84f9fee026a509fe6e6c149d6d20e72e00bfbdee94e50a999b7dee8ce15d7b56f1dc4015860ce5c2e3aa8e9a643dab89e2826bef1dd1a2c98d24bcf819a27a09
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ <img width="336" height="212.0" alt="Logo" src="https://github.com/user-attachments/assets/e27442fb-22d1-44cf-ba3d-f10b24c13652" />
2
+
1
3
  # Rllama
2
4
 
3
5
  Ruby bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) to run open-source language models locally. Run models like GPT-OSS, Qwen 3, Gemma 3, Llama 3, and many others directly in your Ruby application code.
@@ -22,6 +24,38 @@ Or install it yourself as:
22
24
  gem install rllama
23
25
  ```
24
26
 
27
+ ## CLI Chat
28
+
29
+ The `rllama` command-line utility provides an interactive chat interface for conversing with language models. After installing the gem, you can start chatting immediately:
30
+
31
+ ```bash
32
+ rllama
33
+ ```
34
+
35
+ When you run `rllama` without arguments, it will display:
36
+
37
+ - **Downloaded models**: Any models you've already downloaded to `~/.rllama/models/`
38
+ - **Popular models**: A curated list of popular models available for download, including:
39
+ - Gemma 3 1B
40
+ - Llama 3.2 3B
41
+ - Phi-4
42
+ - Qwen3 30B
43
+ - GPT-OSS
44
+
45
+ Simply enter the number of the model you want to use. If you select a model that hasn't been downloaded yet, it will be automatically downloaded from Hugging Face.
46
+
47
+ You can also specify a model path or URL directly:
48
+
49
+ ```bash
50
+ rllama path/to/your/model.gguf
51
+ ```
52
+
53
+ ```bash
54
+ rllama https://huggingface.co/microsoft/phi-4-gguf/resolve/main/phi-4-Q3_K_S.gguf
55
+ ```
56
+
57
+ Once the model has loaded, you can start chatting.
58
+
25
59
  ## Usage
26
60
 
27
61
  ### Text Generation
@@ -162,43 +196,8 @@ By default, embedding vectors are normalized. You can disable normalization with
162
196
  ```ruby
163
197
  # Generate unnormalized embeddings
164
198
  embedding = model.embed('Sample text', normalize: false)
165
-
166
- # Use custom batch size for processing multiple texts
167
- embeddings = model.embed(
168
- ['roses are red', 'violets are blue', 'sugar is sweet'],
169
- normalize: true
170
- )
171
- ```
172
-
173
- ## CLI Chat Utility
174
-
175
- The `rllama` command-line utility provides an interactive chat interface for conversing with language models. After installing the gem, you can start chatting immediately:
176
-
177
- ```bash
178
- rllama
179
- ```
180
-
181
- When you run `rllama` without arguments, it will display:
182
-
183
- - **Downloaded models**: Any models you've already downloaded to `~/.rllama/models/`
184
- - **Popular models**: A curated list of popular models available for download, including:
185
- - Gemma 3 1B
186
- - Llama 3.2 3B
187
- - Phi-4
188
- - Qwen3 30B
189
- - GPT-OSS
190
- - And more...
191
-
192
- Simply enter the number of the model you want to use. If you select a model that hasn't been downloaded yet, it will be automatically downloaded from Hugging Face.
193
-
194
- You can also specify a model path directly:
195
-
196
- ```bash
197
- rllama path/to/your/model.gguf
198
199
  ```
199
200
 
200
- Once the model loads, you can start chatting.
201
-
202
201
  ## Finding Models
203
202
 
204
203
  You can download GGUF format models from various sources:
@@ -1,10 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'etc'
4
+
3
5
  module Rllama
4
6
  class Context
5
7
  attr_reader :messages, :n_ctx, :n_batch, :n_past
6
8
 
7
- def initialize(model, embeddings: false, n_ctx: nil, n_batch: nil)
9
+ def initialize(model, embeddings: false, n_ctx: nil, n_batch: nil, n_threads: Etc.nprocessors)
8
10
  @model = model
9
11
  @n_ctx = n_ctx
10
12
  @n_batch = n_batch
@@ -15,6 +17,9 @@ module Rllama
15
17
  @ctx_params[:n_ctx] = @n_ctx if @n_ctx
16
18
  @ctx_params[:n_batch] = @n_batch if @n_batch
17
19
 
20
+ @ctx_params[:n_threads] = n_threads
21
+ @ctx_params[:n_threads_batch] = n_threads
22
+
18
23
  if @embeddings
19
24
  seq_cap = @model.n_seq_max
20
25
 
data/lib/rllama/cpp.rb CHANGED
@@ -8,7 +8,7 @@ module Rllama
8
8
 
9
9
  LIB_NAME = 'llama'
10
10
 
11
- platform =
11
+ PLATFORM =
12
12
  case FFI::Platform::OS
13
13
  when 'darwin'
14
14
  FFI::Platform::ARCH == 'aarch64' ? 'arm64-darwin' : 'x86_64-darwin'
@@ -28,12 +28,27 @@ module Rllama
28
28
  "lib#{LIB_NAME}.so"
29
29
  end
30
30
 
31
- platform_dir = File.join(__dir__, platform)
32
- platform_path = File.join(platform_dir, lib_file)
31
+ PLATFORM_DIR = File.join(__dir__, PLATFORM)
32
+
33
+ platform_path = File.join(PLATFORM_DIR, lib_file)
33
34
 
34
35
  lib_paths = []
36
+
35
37
  lib_paths << platform_path if File.exist?(platform_path)
36
38
 
39
+ ggml_lib_file =
40
+ case FFI::Platform::OS
41
+ when 'darwin'
42
+ 'libggml.dylib'
43
+ when 'windows', 'mingw32'
44
+ 'ggml.dll'
45
+ else
46
+ 'libggml.so'
47
+ end
48
+
49
+ ggml_platform_path = File.join(PLATFORM_DIR, ggml_lib_file)
50
+ lib_paths << ggml_platform_path if File.exist?(ggml_platform_path)
51
+
37
52
  lib_paths +=
38
53
  case FFI::Platform::OS
39
54
  when 'darwin'
@@ -436,6 +451,8 @@ module Rllama
436
451
  attach_function :llama_backend_init, [], :void
437
452
  attach_function :llama_backend_free, [], :void
438
453
  attach_function :llama_numa_init, [:int], :void # ggml_numa_strategy
454
+ attach_function :ggml_backend_load_all, [], :void
455
+ attach_function :ggml_backend_load_all_from_path, [:string], :void
439
456
 
440
457
  # Threadpool
441
458
  attach_function :llama_attach_threadpool, %i[llama_context_p ggml_threadpool_t ggml_threadpool_t], :void
@@ -681,10 +698,14 @@ module Rllama
681
698
  llama_log_set(@log_callback, nil)
682
699
  end
683
700
 
684
- llama_backend_init
685
-
686
701
  silence_log!
687
702
 
703
+ if File.directory?(PLATFORM_DIR)
704
+ ggml_backend_load_all_from_path(PLATFORM_DIR)
705
+ else
706
+ ggml_backend_load_all
707
+ end
708
+
688
709
  freeze
689
710
  end
690
711
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rllama
4
- VERSION = '1.0.1'
4
+ VERSION = '1.0.3'
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rllama
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.3
5
5
  platform: aarch64-linux-gnu
6
6
  authors:
7
7
  - Pete Matsyburka
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-10-05 00:00:00.000000000 Z
10
+ date: 2025-10-07 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: ffi