rllama 1.0.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require 'net/http'
5
+ require 'fileutils'
6
+
7
+ module Rllama
8
+ module Loader
9
+ HUGGINGFACE_BASE_URL = 'https://huggingface.co'
10
+ DEFAULT_DIR = File.join(Dir.home, '.rllama')
11
+
12
+ UNITS = %w[B KB MB GB TB].freeze
13
+
14
+ module_function
15
+
16
+ def resolve(path_or_name, dir: nil)
17
+ dir ||= DEFAULT_DIR
18
+
19
+ dir = File.join(dir, 'models')
20
+
21
+ return path_or_name if local_file?(path_or_name)
22
+
23
+ if url?(path_or_name)
24
+ download_from_url(path_or_name, dir)
25
+ elsif huggingface_path?(path_or_name)
26
+ download_from_huggingface(path_or_name, dir)
27
+ else
28
+ raise Error, "Invalid model path or name: #{path_or_name}"
29
+ end
30
+ end
31
+
32
+ def local_file?(path)
33
+ File.exist?(path)
34
+ end
35
+
36
+ def url?(path)
37
+ uri = URI.parse(path)
38
+
39
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
40
+ rescue URI::InvalidURIError
41
+ false
42
+ end
43
+
44
+ def huggingface_path?(path)
45
+ return false if path.start_with?('/') || path.include?('://')
46
+
47
+ parts = path.split('/')
48
+
49
+ parts.length >= 3 && parts.last.end_with?('.gguf')
50
+ end
51
+
52
+ def download_from_huggingface(hf_path, dir)
53
+ parts = hf_path.split('/')
54
+
55
+ raise Error, "Invalid HuggingFace path: #{hf_path}" if parts.length < 3
56
+
57
+ org = parts[0]
58
+ repo = parts[1]
59
+ file_path = parts[2..].join('/')
60
+
61
+ url = "#{HUGGINGFACE_BASE_URL}/#{org}/#{repo}/resolve/main/#{file_path}"
62
+
63
+ local_path = File.join(dir, org, repo, file_path)
64
+
65
+ puts "Destination: #{local_path}"
66
+
67
+ download_file(url, local_path, "HuggingFace model: #{hf_path}")
68
+ end
69
+
70
+ def download_from_url(url, dir)
71
+ uri = URI.parse(url)
72
+
73
+ filename = File.basename(uri.path)
74
+
75
+ local_path = File.join(dir, filename)
76
+
77
+ puts "Destination: #{local_path}"
78
+
79
+ download_file(url, local_path, "URL: #{url}")
80
+ end
81
+
82
+ def download_file(url, local_path, description)
83
+ FileUtils.mkdir_p(File.dirname(local_path))
84
+
85
+ return local_path if File.exist?(local_path)
86
+
87
+ temp_path = File.join(File.dirname(local_path), "~#{File.basename(local_path)}")
88
+
89
+ existing_size = File.exist?(temp_path) ? File.size(temp_path) : 0
90
+
91
+ uri = URI.parse(url)
92
+
93
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
94
+ request = Net::HTTP::Get.new(uri.request_uri)
95
+
96
+ request['Range'] = "bytes=#{existing_size}-" if existing_size.positive?
97
+
98
+ http.request(request) do |response|
99
+ case response
100
+ when Net::HTTPSuccess, Net::HTTPPartialContent
101
+ if response['Content-Range']
102
+ total_size = response['Content-Range'].split('/').last.to_i
103
+ else
104
+ total_size = response['content-length'].to_i
105
+
106
+ if existing_size.positive? && response.code == '200'
107
+ puts "\nServer doesn't support resume, starting from beginning..."
108
+
109
+ existing_size = 0
110
+
111
+ FileUtils.rm_f(temp_path)
112
+ end
113
+ end
114
+
115
+ downloaded = existing_size
116
+ file_mode = existing_size.positive? ? 'ab' : 'wb'
117
+
118
+ File.open(temp_path, file_mode) do |file|
119
+ response.read_body do |chunk|
120
+ file.write(chunk)
121
+ downloaded += chunk.size
122
+
123
+ if total_size.positive?
124
+ progress = (downloaded.to_f / total_size * 100).round
125
+ total_str = format_bytes(total_size)
126
+ downloaded_str = format_bytes(downloaded)
127
+ padding = total_str.length
128
+ formatted_downloaded = format("%#{padding}s", downloaded_str)
129
+ print format("\rProgress: %<progress>6d%% (%<downloaded>s / %<total>s)",
130
+ progress: progress, downloaded: formatted_downloaded, total: total_str)
131
+ else
132
+ print "\rDownloaded: #{format_bytes(downloaded)}"
133
+ end
134
+ end
135
+ end
136
+
137
+ unless verify_download(temp_path, total_size)
138
+ FileUtils.rm_f(temp_path)
139
+
140
+ raise Error, 'Download verification failed - file size mismatch'
141
+ end
142
+
143
+ File.rename(temp_path, local_path)
144
+
145
+ puts
146
+ when Net::HTTPRedirection
147
+ redirect_url = response['location']
148
+
149
+ redirect_url = URI.join(url, redirect_url).to_s unless redirect_url.start_with?('http://', 'https://')
150
+
151
+ return download_file(redirect_url, local_path, description)
152
+ when Net::HTTPRequestedRangeNotSatisfiable
153
+ if File.exist?(temp_path)
154
+ uri = URI.parse(url)
155
+
156
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |check_http|
157
+ head_request = Net::HTTP::Head.new(uri.request_uri)
158
+ head_response = check_http.request(head_request)
159
+
160
+ if head_response.is_a?(Net::HTTPSuccess)
161
+ expected_size = head_response['content-length'].to_i
162
+ actual_size = File.size(temp_path)
163
+
164
+ if expected_size.positive? && expected_size == actual_size
165
+ File.rename(temp_path, local_path)
166
+
167
+ return local_path
168
+ end
169
+ end
170
+ end
171
+
172
+ File.delete(temp_path)
173
+
174
+ return download_file(url, local_path, description)
175
+ end
176
+
177
+ raise Error, "Range request failed: #{response.code} #{response.message}"
178
+ else
179
+ raise Error, "Failed to download model: #{response.code} #{response.message}"
180
+ end
181
+ end
182
+ end
183
+
184
+ local_path
185
+ end
186
+
187
+ def verify_download(local_path, expected_size)
188
+ return true if expected_size <= 0
189
+
190
+ actual_size = File.size(local_path)
191
+ actual_size == expected_size
192
+ end
193
+
194
+ def format_bytes(bytes)
195
+ return '0 B' if bytes.zero?
196
+
197
+ exp = (Math.log(bytes) / Math.log(1024)).floor
198
+
199
+ exp = [exp, UNITS.length - 1].min
200
+
201
+ value = bytes.to_f / (1024**exp)
202
+
203
+ if exp >= 3
204
+ format('%<val>.2f %<unit>s', val: value, unit: UNITS[exp])
205
+ else
206
+ format('%<val>d %<unit>s', val: value.round, unit: UNITS[exp])
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rllama
4
+ class Model
5
+ DEFAULT_CONTEXT_LENGTH = 2**13
6
+
7
+ attr_reader :pointer
8
+
9
+ def initialize(path_or_name, dir: nil)
10
+ resolved_path = Loader.resolve(path_or_name, dir:)
11
+
12
+ model_params = Cpp.llama_model_default_params
13
+
14
+ @pointer = Cpp.llama_model_load_from_file(resolved_path, model_params)
15
+
16
+ raise Error, "Unable to load model from #{resolved_path}" if @pointer.null?
17
+ end
18
+
19
+ def chat_template
20
+ @chat_template ||= Cpp.llama_model_chat_template(@pointer, nil)
21
+ end
22
+
23
+ def vocab
24
+ @vocab ||= Cpp.llama_model_get_vocab(@pointer)
25
+ end
26
+
27
+ def n_embd
28
+ @n_embd ||= Cpp.llama_model_n_embd(@pointer)
29
+ end
30
+
31
+ def n_seq_max
32
+ @n_seq_max ||= Cpp.llama_max_parallel_sequences
33
+ end
34
+
35
+ def n_ctx_train
36
+ @n_ctx_train ||= Cpp.llama_model_n_ctx_train(@pointer)
37
+ end
38
+
39
+ def generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05,
40
+ seed: nil, system: nil, &block)
41
+ init_context(n_ctx: max_tokens) do |ctx|
42
+ ctx.generate(prompt, max_tokens: ctx.n_ctx,
43
+ temperature:, top_k:, top_p:, seed:, system:, min_p:,
44
+ &block)
45
+ end
46
+ end
47
+ alias message generate
48
+
49
+ def embed(prompt, normalize: true, batch_size: 512, &block)
50
+ init_embedding_context do |ctx|
51
+ ctx.embed(prompt, normalize:, batch_size:, &block)
52
+ end
53
+ end
54
+
55
+ def close
56
+ Cpp.llama_model_free(@pointer)
57
+ end
58
+
59
+ def init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512)
60
+ context = Context.new(self, embeddings:, n_ctx:, n_batch:)
61
+
62
+ if block_given?
63
+ result = yield context
64
+
65
+ context.close
66
+
67
+ return result
68
+ end
69
+
70
+ context
71
+ end
72
+
73
+ def init_embedding_context(n_ctx: 2048, n_batch: 512, &)
74
+ init_context(embeddings: true, n_ctx:, n_batch:, &)
75
+ end
76
+
77
+ def build_chat_template(messages)
78
+ raise Error, 'Model does not provide a chat template' if chat_template.nil? || chat_template.empty?
79
+
80
+ count = messages.length
81
+ struct_size = Cpp::LlamaChatMessage.size
82
+ array_ptr = FFI::MemoryPointer.new(struct_size * count)
83
+
84
+ messages.each_with_index do |m, i|
85
+ struct_ptr = array_ptr + (i * struct_size)
86
+ msg_struct = Cpp::LlamaChatMessage.new(struct_ptr)
87
+ msg_struct[:role] = FFI::MemoryPointer.from_string(m[:role].to_s)
88
+ msg_struct[:content] = FFI::MemoryPointer.from_string(m[:content].to_s)
89
+ end
90
+
91
+ needed = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, nil, 0)
92
+
93
+ raise Error, 'Failed to apply chat template' if needed.negative?
94
+
95
+ buf = FFI::MemoryPointer.new(:char, needed)
96
+ written = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, buf, needed)
97
+
98
+ raise Error, 'Failed to apply chat template' if written.negative?
99
+
100
+ buf.read_string(written)
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rllama
4
+ VERSION = '1.0.0'
5
+ end
data/lib/rllama.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rllama
4
+ autoload :Model, 'rllama/model'
5
+ autoload :Loader, 'rllama/loader'
6
+ autoload :Context, 'rllama/context'
7
+ autoload :Cpp, 'rllama/cpp'
8
+ autoload :Cli, 'rllama/cli'
9
+ autoload :VERSION, 'rllama/version'
10
+
11
+ Result = Struct.new(:text, :stats, keyword_init: true)
12
+ Error = Class.new(StandardError)
13
+
14
+ module_function
15
+
16
+ def load_model(path_or_name, dir: nil)
17
+ model = Model.new(path_or_name, dir:)
18
+
19
+ if block_given?
20
+ begin
21
+ yield model
22
+ ensure
23
+ model.close
24
+ end
25
+ else
26
+ model
27
+ end
28
+ end
29
+
30
+ def silence_log!
31
+ Cpp.silence_log!
32
+ end
33
+
34
+ def set_log(io = $stdout)
35
+ Cpp.set_log(io)
36
+ end
37
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rllama
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: x86_64-darwin
6
+ authors:
7
+ - Pete Matsyburka
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-10-05 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ffi
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '1.0'
26
+ description: Ruby bindings for Llama.cpp to run local LLMs in Ruby applications.
27
+ email:
28
+ - pete@docuseal.com
29
+ executables:
30
+ - rllama
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - README.md
35
+ - bin/rllama
36
+ - lib/rllama.rb
37
+ - lib/rllama/cli.rb
38
+ - lib/rllama/context.rb
39
+ - lib/rllama/cpp.rb
40
+ - lib/rllama/loader.rb
41
+ - lib/rllama/model.rb
42
+ - lib/rllama/version.rb
43
+ - lib/rllama/x86_64-darwin/libggml-base.dylib
44
+ - lib/rllama/x86_64-darwin/libggml-blas.dylib
45
+ - lib/rllama/x86_64-darwin/libggml-cpu.dylib
46
+ - lib/rllama/x86_64-darwin/libggml-rpc.dylib
47
+ - lib/rllama/x86_64-darwin/libggml.dylib
48
+ - lib/rllama/x86_64-darwin/libllama.dylib
49
+ licenses:
50
+ - MIT
51
+ metadata:
52
+ bug_tracker_uri: https://github.com/docusealco/rllama/issues
53
+ homepage_uri: https://github.com/docusealco/rllama
54
+ source_code_uri: https://github.com/docusealco/rllama
55
+ rubygems_mfa_required: 'true'
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 3.1.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.6.2
71
+ specification_version: 4
72
+ summary: Ruby bindings for Llama API
73
+ test_files: []