rllama 1.0.1-aarch64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require 'net/http'
5
+ require 'fileutils'
6
+
7
+ module Rllama
8
+ module Loader
9
+ HUGGINGFACE_BASE_URL = 'https://huggingface.co'
10
+ DEFAULT_DIR = File.join(Dir.home, '.rllama')
11
+
12
+ UNITS = %w[B KB MB GB TB].freeze
13
+
14
+ module_function
15
+
16
+ def resolve(path_or_name, dir: nil)
17
+ dir ||= DEFAULT_DIR
18
+
19
+ dir = File.join(dir, 'models')
20
+
21
+ return path_or_name if local_file?(path_or_name)
22
+
23
+ if url?(path_or_name)
24
+ download_from_url(path_or_name, dir)
25
+ elsif huggingface_path?(path_or_name)
26
+ download_from_huggingface(path_or_name, dir)
27
+ else
28
+ raise Error, "Invalid model path or name: #{path_or_name}"
29
+ end
30
+ end
31
+
32
+ def local_file?(path)
33
+ File.exist?(path)
34
+ end
35
+
36
+ def url?(path)
37
+ uri = URI.parse(path)
38
+
39
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
40
+ rescue URI::InvalidURIError
41
+ false
42
+ end
43
+
44
+ def huggingface_path?(path)
45
+ return false if path.start_with?('/') || path.include?('://')
46
+
47
+ parts = path.split('/')
48
+
49
+ parts.length >= 3 && parts.last.end_with?('.gguf')
50
+ end
51
+
52
+ def download_from_huggingface(hf_path, dir)
53
+ parts = hf_path.split('/')
54
+
55
+ raise Error, "Invalid HuggingFace path: #{hf_path}" if parts.length < 3
56
+
57
+ org = parts[0]
58
+ repo = parts[1]
59
+ file_path = parts[2..].join('/')
60
+
61
+ url = "#{HUGGINGFACE_BASE_URL}/#{org}/#{repo}/resolve/main/#{file_path}"
62
+
63
+ local_path = File.join(dir, org, repo, file_path)
64
+
65
+ return local_path if File.exist?(local_path)
66
+
67
+ puts "Destination: #{local_path}"
68
+
69
+ download_file(url, local_path, "HuggingFace model: #{hf_path}")
70
+ end
71
+
72
+ def download_from_url(url, dir)
73
+ uri = URI.parse(url)
74
+
75
+ filename = File.basename(uri.path)
76
+
77
+ local_path = File.join(dir, filename)
78
+
79
+ return local_path if File.exist?(local_path)
80
+
81
+ puts "Destination: #{local_path}"
82
+
83
+ download_file(url, local_path, "URL: #{url}")
84
+ end
85
+
86
+ def download_file(url, local_path, description)
87
+ FileUtils.mkdir_p(File.dirname(local_path))
88
+
89
+ temp_path = File.join(File.dirname(local_path), "~#{File.basename(local_path)}")
90
+
91
+ existing_size = File.exist?(temp_path) ? File.size(temp_path) : 0
92
+
93
+ uri = URI.parse(url)
94
+
95
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
96
+ request = Net::HTTP::Get.new(uri.request_uri)
97
+
98
+ request['Range'] = "bytes=#{existing_size}-" if existing_size.positive?
99
+
100
+ http.request(request) do |response|
101
+ case response
102
+ when Net::HTTPSuccess, Net::HTTPPartialContent
103
+ if response['Content-Range']
104
+ total_size = response['Content-Range'].split('/').last.to_i
105
+ else
106
+ total_size = response['content-length'].to_i
107
+
108
+ if existing_size.positive? && response.code == '200'
109
+ puts "\nServer doesn't support resume, starting from beginning..."
110
+
111
+ existing_size = 0
112
+
113
+ FileUtils.rm_f(temp_path)
114
+ end
115
+ end
116
+
117
+ downloaded = existing_size
118
+ file_mode = existing_size.positive? ? 'ab' : 'wb'
119
+
120
+ File.open(temp_path, file_mode) do |file|
121
+ response.read_body do |chunk|
122
+ file.write(chunk)
123
+ downloaded += chunk.size
124
+
125
+ if total_size.positive?
126
+ progress = (downloaded.to_f / total_size * 100).round
127
+ total_str = format_bytes(total_size)
128
+ downloaded_str = format_bytes(downloaded)
129
+ padding = total_str.length
130
+ formatted_downloaded = format("%#{padding}s", downloaded_str)
131
+ print format("\rProgress: %<progress>6d%% (%<downloaded>s / %<total>s)",
132
+ progress: progress, downloaded: formatted_downloaded, total: total_str)
133
+ else
134
+ print "\rDownloaded: #{format_bytes(downloaded)}"
135
+ end
136
+ end
137
+ end
138
+
139
+ unless verify_download(temp_path, total_size)
140
+ FileUtils.rm_f(temp_path)
141
+
142
+ raise Error, 'Download verification failed - file size mismatch'
143
+ end
144
+
145
+ File.rename(temp_path, local_path)
146
+
147
+ puts
148
+ when Net::HTTPRedirection
149
+ redirect_url = response['location']
150
+
151
+ redirect_url = URI.join(url, redirect_url).to_s unless redirect_url.start_with?('http://', 'https://')
152
+
153
+ return download_file(redirect_url, local_path, description)
154
+ when Net::HTTPRequestedRangeNotSatisfiable
155
+ if File.exist?(temp_path)
156
+ uri = URI.parse(url)
157
+
158
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |check_http|
159
+ head_request = Net::HTTP::Head.new(uri.request_uri)
160
+ head_response = check_http.request(head_request)
161
+
162
+ if head_response.is_a?(Net::HTTPSuccess)
163
+ expected_size = head_response['content-length'].to_i
164
+ actual_size = File.size(temp_path)
165
+
166
+ if expected_size.positive? && expected_size == actual_size
167
+ File.rename(temp_path, local_path)
168
+
169
+ return local_path
170
+ end
171
+ end
172
+ end
173
+
174
+ File.delete(temp_path)
175
+
176
+ return download_file(url, local_path, description)
177
+ end
178
+
179
+ raise Error, "Range request failed: #{response.code} #{response.message}"
180
+ else
181
+ raise Error, "Failed to download model: #{response.code} #{response.message}"
182
+ end
183
+ end
184
+ end
185
+
186
+ local_path
187
+ end
188
+
189
+ def verify_download(local_path, expected_size)
190
+ return true if expected_size <= 0
191
+
192
+ actual_size = File.size(local_path)
193
+ actual_size == expected_size
194
+ end
195
+
196
+ def format_bytes(bytes)
197
+ return '0 B' if bytes.zero?
198
+
199
+ exp = (Math.log(bytes) / Math.log(1024)).floor
200
+
201
+ exp = [exp, UNITS.length - 1].min
202
+
203
+ value = bytes.to_f / (1024**exp)
204
+
205
+ if exp >= 3
206
+ format('%<val>.2f %<unit>s', val: value, unit: UNITS[exp])
207
+ else
208
+ format('%<val>d %<unit>s', val: value.round, unit: UNITS[exp])
209
+ end
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rllama
4
+ class Model
5
+ DEFAULT_CONTEXT_LENGTH = 2**13
6
+
7
+ attr_reader :pointer
8
+
9
+ def initialize(path_or_name, dir: nil)
10
+ resolved_path = Loader.resolve(path_or_name, dir:)
11
+
12
+ model_params = Cpp.llama_model_default_params
13
+
14
+ @pointer = Cpp.llama_model_load_from_file(resolved_path, model_params)
15
+
16
+ raise Error, "Unable to load model from #{resolved_path}" if @pointer.null?
17
+ end
18
+
19
+ def chat_template
20
+ @chat_template ||= Cpp.llama_model_chat_template(@pointer, nil)
21
+ end
22
+
23
+ def vocab
24
+ @vocab ||= Cpp.llama_model_get_vocab(@pointer)
25
+ end
26
+
27
+ def n_embd
28
+ @n_embd ||= Cpp.llama_model_n_embd(@pointer)
29
+ end
30
+
31
+ def n_seq_max
32
+ @n_seq_max ||= Cpp.llama_max_parallel_sequences
33
+ end
34
+
35
+ def n_ctx_train
36
+ @n_ctx_train ||= Cpp.llama_model_n_ctx_train(@pointer)
37
+ end
38
+
39
+ def generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05,
40
+ seed: nil, system: nil, &block)
41
+ init_context(n_ctx: max_tokens) do |ctx|
42
+ ctx.generate(prompt, max_tokens: ctx.n_ctx,
43
+ temperature:, top_k:, top_p:, seed:, system:, min_p:,
44
+ &block)
45
+ end
46
+ end
47
+ alias message generate
48
+
49
+ def embed(prompt, normalize: true, batch_size: 512, &block)
50
+ inputs = prompt.is_a?(Array) ? prompt : [prompt]
51
+
52
+ tokenized_inputs = inputs.map { |text| tokenize(text, max_tokens: n_ctx_train) }
53
+ max_token_length = tokenized_inputs.map(&:length).max || 0
54
+
55
+ effective_batch_size = [batch_size, max_token_length].max
56
+ effective_ctx = [n_ctx_train, max_token_length].min
57
+
58
+ init_embedding_context(n_ctx: effective_ctx, n_batch: effective_batch_size) do |ctx|
59
+ inputs = prompt.is_a?(Array) ? tokenized_inputs : tokenized_inputs[0]
60
+
61
+ ctx.embed(inputs, normalize:, batch_size: effective_batch_size, &block)
62
+ end
63
+ end
64
+
65
+ def tokenize(text, max_tokens: nil)
66
+ size = text.bytesize + 2
67
+
68
+ tokens_ptr = FFI::MemoryPointer.new(:int32, size)
69
+ count = Cpp.llama_tokenize(vocab, text, text.bytesize, tokens_ptr, size, true, false)
70
+
71
+ raise Error, "Failed to tokenize text: '#{text}'" if count.negative?
72
+
73
+ tokens_ptr.read_array_of_int32([count, max_tokens].compact.min)
74
+ end
75
+
76
+ def close
77
+ Cpp.llama_model_free(@pointer)
78
+ end
79
+
80
+ def init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512)
81
+ context = Context.new(self, embeddings:, n_ctx:, n_batch:)
82
+
83
+ if block_given?
84
+ result = yield context
85
+
86
+ context.close
87
+
88
+ return result
89
+ end
90
+
91
+ context
92
+ end
93
+
94
+ def init_embedding_context(n_ctx: n_ctx_train, n_batch: 512, &)
95
+ init_context(embeddings: true, n_ctx:, n_batch:, &)
96
+ end
97
+
98
+ def build_chat_template(messages)
99
+ raise Error, 'Model does not provide a chat template' if chat_template.nil? || chat_template.empty?
100
+
101
+ count = messages.length
102
+ struct_size = Cpp::LlamaChatMessage.size
103
+ array_ptr = FFI::MemoryPointer.new(struct_size * count)
104
+
105
+ messages.each_with_index do |m, i|
106
+ struct_ptr = array_ptr + (i * struct_size)
107
+ msg_struct = Cpp::LlamaChatMessage.new(struct_ptr)
108
+ msg_struct[:role] = FFI::MemoryPointer.from_string(m[:role].to_s)
109
+ msg_struct[:content] = FFI::MemoryPointer.from_string(m[:content].to_s)
110
+ end
111
+
112
+ needed = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, nil, 0)
113
+
114
+ raise Error, 'Failed to apply chat template' if needed.negative?
115
+
116
+ buf = FFI::MemoryPointer.new(:char, needed)
117
+ written = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, buf, needed)
118
+
119
+ raise Error, 'Failed to apply chat template' if written.negative?
120
+
121
+ buf.read_string(written)
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rllama
4
+ VERSION = '1.0.1'
5
+ end
data/lib/rllama.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rllama
4
+ autoload :Model, 'rllama/model'
5
+ autoload :Loader, 'rllama/loader'
6
+ autoload :Context, 'rllama/context'
7
+ autoload :Cpp, 'rllama/cpp'
8
+ autoload :Cli, 'rllama/cli'
9
+ autoload :VERSION, 'rllama/version'
10
+
11
+ Result = Struct.new(:text, :stats, keyword_init: true)
12
+ Error = Class.new(StandardError)
13
+
14
+ module_function
15
+
16
+ def load_model(path_or_name, dir: nil)
17
+ model = Model.new(path_or_name, dir:)
18
+
19
+ if block_given?
20
+ begin
21
+ yield model
22
+ ensure
23
+ model.close
24
+ end
25
+ else
26
+ model
27
+ end
28
+ end
29
+
30
+ def silence_log!
31
+ Cpp.silence_log!
32
+ end
33
+
34
+ def set_log(io = $stdout)
35
+ Cpp.set_log(io)
36
+ end
37
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rllama
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: aarch64-linux-musl
6
+ authors:
7
+ - Pete Matsyburka
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-10-05 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ffi
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '1.0'
26
+ description: Ruby bindings for Llama.cpp to run local LLMs in Ruby applications.
27
+ email:
28
+ - pete@docuseal.com
29
+ executables:
30
+ - rllama
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - README.md
35
+ - bin/rllama
36
+ - lib/rllama.rb
37
+ - lib/rllama/aarch64-linux/libggml-base.so
38
+ - lib/rllama/aarch64-linux/libggml-cpu-alderlake.so
39
+ - lib/rllama/aarch64-linux/libggml-cpu-haswell.so
40
+ - lib/rllama/aarch64-linux/libggml-cpu-icelake.so
41
+ - lib/rllama/aarch64-linux/libggml-cpu-sandybridge.so
42
+ - lib/rllama/aarch64-linux/libggml-cpu-sapphirerapids.so
43
+ - lib/rllama/aarch64-linux/libggml-cpu-skylakex.so
44
+ - lib/rllama/aarch64-linux/libggml-cpu-sse42.so
45
+ - lib/rllama/aarch64-linux/libggml-cpu-x64.so
46
+ - lib/rllama/aarch64-linux/libggml-cpu.so
47
+ - lib/rllama/aarch64-linux/libggml-rpc.so
48
+ - lib/rllama/aarch64-linux/libggml.so
49
+ - lib/rllama/aarch64-linux/libllama.so
50
+ - lib/rllama/cli.rb
51
+ - lib/rllama/context.rb
52
+ - lib/rllama/cpp.rb
53
+ - lib/rllama/loader.rb
54
+ - lib/rllama/model.rb
55
+ - lib/rllama/version.rb
56
+ licenses:
57
+ - MIT
58
+ metadata:
59
+ bug_tracker_uri: https://github.com/docusealco/rllama/issues
60
+ homepage_uri: https://github.com/docusealco/rllama
61
+ source_code_uri: https://github.com/docusealco/rllama
62
+ rubygems_mfa_required: 'true'
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: 3.1.0
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubygems_version: 3.6.2
78
+ specification_version: 4
79
+ summary: Ruby bindings for Llama API
80
+ test_files: []