rllama 1.0.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +214 -0
- data/bin/rllama +8 -0
- data/lib/rllama/cli.rb +183 -0
- data/lib/rllama/context.rb +233 -0
- data/lib/rllama/cpp.rb +690 -0
- data/lib/rllama/loader.rb +210 -0
- data/lib/rllama/model.rb +103 -0
- data/lib/rllama/version.rb +5 -0
- data/lib/rllama/x86_64-darwin/libggml-base.dylib +0 -0
- data/lib/rllama/x86_64-darwin/libggml-blas.dylib +0 -0
- data/lib/rllama/x86_64-darwin/libggml-cpu.dylib +0 -0
- data/lib/rllama/x86_64-darwin/libggml-rpc.dylib +0 -0
- data/lib/rllama/x86_64-darwin/libggml.dylib +0 -0
- data/lib/rllama/x86_64-darwin/libllama.dylib +0 -0
- data/lib/rllama.rb +37 -0
- metadata +73 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'uri'
|
4
|
+
require 'net/http'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module Rllama
|
8
|
+
module Loader
|
9
|
+
HUGGINGFACE_BASE_URL = 'https://huggingface.co'
|
10
|
+
DEFAULT_DIR = File.join(Dir.home, '.rllama')
|
11
|
+
|
12
|
+
UNITS = %w[B KB MB GB TB].freeze
|
13
|
+
|
14
|
+
module_function
|
15
|
+
|
16
|
+
def resolve(path_or_name, dir: nil)
|
17
|
+
dir ||= DEFAULT_DIR
|
18
|
+
|
19
|
+
dir = File.join(dir, 'models')
|
20
|
+
|
21
|
+
return path_or_name if local_file?(path_or_name)
|
22
|
+
|
23
|
+
if url?(path_or_name)
|
24
|
+
download_from_url(path_or_name, dir)
|
25
|
+
elsif huggingface_path?(path_or_name)
|
26
|
+
download_from_huggingface(path_or_name, dir)
|
27
|
+
else
|
28
|
+
raise Error, "Invalid model path or name: #{path_or_name}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def local_file?(path)
|
33
|
+
File.exist?(path)
|
34
|
+
end
|
35
|
+
|
36
|
+
def url?(path)
|
37
|
+
uri = URI.parse(path)
|
38
|
+
|
39
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
40
|
+
rescue URI::InvalidURIError
|
41
|
+
false
|
42
|
+
end
|
43
|
+
|
44
|
+
def huggingface_path?(path)
|
45
|
+
return false if path.start_with?('/') || path.include?('://')
|
46
|
+
|
47
|
+
parts = path.split('/')
|
48
|
+
|
49
|
+
parts.length >= 3 && parts.last.end_with?('.gguf')
|
50
|
+
end
|
51
|
+
|
52
|
+
def download_from_huggingface(hf_path, dir)
|
53
|
+
parts = hf_path.split('/')
|
54
|
+
|
55
|
+
raise Error, "Invalid HuggingFace path: #{hf_path}" if parts.length < 3
|
56
|
+
|
57
|
+
org = parts[0]
|
58
|
+
repo = parts[1]
|
59
|
+
file_path = parts[2..].join('/')
|
60
|
+
|
61
|
+
url = "#{HUGGINGFACE_BASE_URL}/#{org}/#{repo}/resolve/main/#{file_path}"
|
62
|
+
|
63
|
+
local_path = File.join(dir, org, repo, file_path)
|
64
|
+
|
65
|
+
puts "Destination: #{local_path}"
|
66
|
+
|
67
|
+
download_file(url, local_path, "HuggingFace model: #{hf_path}")
|
68
|
+
end
|
69
|
+
|
70
|
+
def download_from_url(url, dir)
|
71
|
+
uri = URI.parse(url)
|
72
|
+
|
73
|
+
filename = File.basename(uri.path)
|
74
|
+
|
75
|
+
local_path = File.join(dir, filename)
|
76
|
+
|
77
|
+
puts "Destination: #{local_path}"
|
78
|
+
|
79
|
+
download_file(url, local_path, "URL: #{url}")
|
80
|
+
end
|
81
|
+
|
82
|
+
def download_file(url, local_path, description)
|
83
|
+
FileUtils.mkdir_p(File.dirname(local_path))
|
84
|
+
|
85
|
+
return local_path if File.exist?(local_path)
|
86
|
+
|
87
|
+
temp_path = File.join(File.dirname(local_path), "~#{File.basename(local_path)}")
|
88
|
+
|
89
|
+
existing_size = File.exist?(temp_path) ? File.size(temp_path) : 0
|
90
|
+
|
91
|
+
uri = URI.parse(url)
|
92
|
+
|
93
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
94
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
95
|
+
|
96
|
+
request['Range'] = "bytes=#{existing_size}-" if existing_size.positive?
|
97
|
+
|
98
|
+
http.request(request) do |response|
|
99
|
+
case response
|
100
|
+
when Net::HTTPSuccess, Net::HTTPPartialContent
|
101
|
+
if response['Content-Range']
|
102
|
+
total_size = response['Content-Range'].split('/').last.to_i
|
103
|
+
else
|
104
|
+
total_size = response['content-length'].to_i
|
105
|
+
|
106
|
+
if existing_size.positive? && response.code == '200'
|
107
|
+
puts "\nServer doesn't support resume, starting from beginning..."
|
108
|
+
|
109
|
+
existing_size = 0
|
110
|
+
|
111
|
+
FileUtils.rm_f(temp_path)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
downloaded = existing_size
|
116
|
+
file_mode = existing_size.positive? ? 'ab' : 'wb'
|
117
|
+
|
118
|
+
File.open(temp_path, file_mode) do |file|
|
119
|
+
response.read_body do |chunk|
|
120
|
+
file.write(chunk)
|
121
|
+
downloaded += chunk.size
|
122
|
+
|
123
|
+
if total_size.positive?
|
124
|
+
progress = (downloaded.to_f / total_size * 100).round
|
125
|
+
total_str = format_bytes(total_size)
|
126
|
+
downloaded_str = format_bytes(downloaded)
|
127
|
+
padding = total_str.length
|
128
|
+
formatted_downloaded = format("%#{padding}s", downloaded_str)
|
129
|
+
print format("\rProgress: %<progress>6d%% (%<downloaded>s / %<total>s)",
|
130
|
+
progress: progress, downloaded: formatted_downloaded, total: total_str)
|
131
|
+
else
|
132
|
+
print "\rDownloaded: #{format_bytes(downloaded)}"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
unless verify_download(temp_path, total_size)
|
138
|
+
FileUtils.rm_f(temp_path)
|
139
|
+
|
140
|
+
raise Error, 'Download verification failed - file size mismatch'
|
141
|
+
end
|
142
|
+
|
143
|
+
File.rename(temp_path, local_path)
|
144
|
+
|
145
|
+
puts
|
146
|
+
when Net::HTTPRedirection
|
147
|
+
redirect_url = response['location']
|
148
|
+
|
149
|
+
redirect_url = URI.join(url, redirect_url).to_s unless redirect_url.start_with?('http://', 'https://')
|
150
|
+
|
151
|
+
return download_file(redirect_url, local_path, description)
|
152
|
+
when Net::HTTPRequestedRangeNotSatisfiable
|
153
|
+
if File.exist?(temp_path)
|
154
|
+
uri = URI.parse(url)
|
155
|
+
|
156
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |check_http|
|
157
|
+
head_request = Net::HTTP::Head.new(uri.request_uri)
|
158
|
+
head_response = check_http.request(head_request)
|
159
|
+
|
160
|
+
if head_response.is_a?(Net::HTTPSuccess)
|
161
|
+
expected_size = head_response['content-length'].to_i
|
162
|
+
actual_size = File.size(temp_path)
|
163
|
+
|
164
|
+
if expected_size.positive? && expected_size == actual_size
|
165
|
+
File.rename(temp_path, local_path)
|
166
|
+
|
167
|
+
return local_path
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
File.delete(temp_path)
|
173
|
+
|
174
|
+
return download_file(url, local_path, description)
|
175
|
+
end
|
176
|
+
|
177
|
+
raise Error, "Range request failed: #{response.code} #{response.message}"
|
178
|
+
else
|
179
|
+
raise Error, "Failed to download model: #{response.code} #{response.message}"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
local_path
|
185
|
+
end
|
186
|
+
|
187
|
+
def verify_download(local_path, expected_size)
|
188
|
+
return true if expected_size <= 0
|
189
|
+
|
190
|
+
actual_size = File.size(local_path)
|
191
|
+
actual_size == expected_size
|
192
|
+
end
|
193
|
+
|
194
|
+
def format_bytes(bytes)
|
195
|
+
return '0 B' if bytes.zero?
|
196
|
+
|
197
|
+
exp = (Math.log(bytes) / Math.log(1024)).floor
|
198
|
+
|
199
|
+
exp = [exp, UNITS.length - 1].min
|
200
|
+
|
201
|
+
value = bytes.to_f / (1024**exp)
|
202
|
+
|
203
|
+
if exp >= 3
|
204
|
+
format('%<val>.2f %<unit>s', val: value, unit: UNITS[exp])
|
205
|
+
else
|
206
|
+
format('%<val>d %<unit>s', val: value.round, unit: UNITS[exp])
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
data/lib/rllama/model.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rllama
|
4
|
+
class Model
|
5
|
+
DEFAULT_CONTEXT_LENGTH = 2**13
|
6
|
+
|
7
|
+
attr_reader :pointer
|
8
|
+
|
9
|
+
def initialize(path_or_name, dir: nil)
|
10
|
+
resolved_path = Loader.resolve(path_or_name, dir:)
|
11
|
+
|
12
|
+
model_params = Cpp.llama_model_default_params
|
13
|
+
|
14
|
+
@pointer = Cpp.llama_model_load_from_file(resolved_path, model_params)
|
15
|
+
|
16
|
+
raise Error, "Unable to load model from #{resolved_path}" if @pointer.null?
|
17
|
+
end
|
18
|
+
|
19
|
+
def chat_template
|
20
|
+
@chat_template ||= Cpp.llama_model_chat_template(@pointer, nil)
|
21
|
+
end
|
22
|
+
|
23
|
+
def vocab
|
24
|
+
@vocab ||= Cpp.llama_model_get_vocab(@pointer)
|
25
|
+
end
|
26
|
+
|
27
|
+
def n_embd
|
28
|
+
@n_embd ||= Cpp.llama_model_n_embd(@pointer)
|
29
|
+
end
|
30
|
+
|
31
|
+
def n_seq_max
|
32
|
+
@n_seq_max ||= Cpp.llama_max_parallel_sequences
|
33
|
+
end
|
34
|
+
|
35
|
+
def n_ctx_train
|
36
|
+
@n_ctx_train ||= Cpp.llama_model_n_ctx_train(@pointer)
|
37
|
+
end
|
38
|
+
|
39
|
+
def generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05,
|
40
|
+
seed: nil, system: nil, &block)
|
41
|
+
init_context(n_ctx: max_tokens) do |ctx|
|
42
|
+
ctx.generate(prompt, max_tokens: ctx.n_ctx,
|
43
|
+
temperature:, top_k:, top_p:, seed:, system:, min_p:,
|
44
|
+
&block)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
alias message generate
|
48
|
+
|
49
|
+
def embed(prompt, normalize: true, batch_size: 512, &block)
|
50
|
+
init_embedding_context do |ctx|
|
51
|
+
ctx.embed(prompt, normalize:, batch_size:, &block)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def close
|
56
|
+
Cpp.llama_model_free(@pointer)
|
57
|
+
end
|
58
|
+
|
59
|
+
def init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512)
|
60
|
+
context = Context.new(self, embeddings:, n_ctx:, n_batch:)
|
61
|
+
|
62
|
+
if block_given?
|
63
|
+
result = yield context
|
64
|
+
|
65
|
+
context.close
|
66
|
+
|
67
|
+
return result
|
68
|
+
end
|
69
|
+
|
70
|
+
context
|
71
|
+
end
|
72
|
+
|
73
|
+
def init_embedding_context(n_ctx: 2048, n_batch: 512, &)
|
74
|
+
init_context(embeddings: true, n_ctx:, n_batch:, &)
|
75
|
+
end
|
76
|
+
|
77
|
+
def build_chat_template(messages)
|
78
|
+
raise Error, 'Model does not provide a chat template' if chat_template.nil? || chat_template.empty?
|
79
|
+
|
80
|
+
count = messages.length
|
81
|
+
struct_size = Cpp::LlamaChatMessage.size
|
82
|
+
array_ptr = FFI::MemoryPointer.new(struct_size * count)
|
83
|
+
|
84
|
+
messages.each_with_index do |m, i|
|
85
|
+
struct_ptr = array_ptr + (i * struct_size)
|
86
|
+
msg_struct = Cpp::LlamaChatMessage.new(struct_ptr)
|
87
|
+
msg_struct[:role] = FFI::MemoryPointer.from_string(m[:role].to_s)
|
88
|
+
msg_struct[:content] = FFI::MemoryPointer.from_string(m[:content].to_s)
|
89
|
+
end
|
90
|
+
|
91
|
+
needed = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, nil, 0)
|
92
|
+
|
93
|
+
raise Error, 'Failed to apply chat template' if needed.negative?
|
94
|
+
|
95
|
+
buf = FFI::MemoryPointer.new(:char, needed)
|
96
|
+
written = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, buf, needed)
|
97
|
+
|
98
|
+
raise Error, 'Failed to apply chat template' if written.negative?
|
99
|
+
|
100
|
+
buf.read_string(written)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/rllama.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rllama
|
4
|
+
autoload :Model, 'rllama/model'
|
5
|
+
autoload :Loader, 'rllama/loader'
|
6
|
+
autoload :Context, 'rllama/context'
|
7
|
+
autoload :Cpp, 'rllama/cpp'
|
8
|
+
autoload :Cli, 'rllama/cli'
|
9
|
+
autoload :VERSION, 'rllama/version'
|
10
|
+
|
11
|
+
Result = Struct.new(:text, :stats, keyword_init: true)
|
12
|
+
Error = Class.new(StandardError)
|
13
|
+
|
14
|
+
module_function
|
15
|
+
|
16
|
+
def load_model(path_or_name, dir: nil)
|
17
|
+
model = Model.new(path_or_name, dir:)
|
18
|
+
|
19
|
+
if block_given?
|
20
|
+
begin
|
21
|
+
yield model
|
22
|
+
ensure
|
23
|
+
model.close
|
24
|
+
end
|
25
|
+
else
|
26
|
+
model
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def silence_log!
|
31
|
+
Cpp.silence_log!
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_log(io = $stdout)
|
35
|
+
Cpp.set_log(io)
|
36
|
+
end
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rllama
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: x86_64-darwin
|
6
|
+
authors:
|
7
|
+
- Pete Matsyburka
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-10-05 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: ffi
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '1.0'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '1.0'
|
26
|
+
description: Ruby bindings for Llama.cpp to run local LLMs in Ruby applications.
|
27
|
+
email:
|
28
|
+
- pete@docuseal.com
|
29
|
+
executables:
|
30
|
+
- rllama
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- README.md
|
35
|
+
- bin/rllama
|
36
|
+
- lib/rllama.rb
|
37
|
+
- lib/rllama/cli.rb
|
38
|
+
- lib/rllama/context.rb
|
39
|
+
- lib/rllama/cpp.rb
|
40
|
+
- lib/rllama/loader.rb
|
41
|
+
- lib/rllama/model.rb
|
42
|
+
- lib/rllama/version.rb
|
43
|
+
- lib/rllama/x86_64-darwin/libggml-base.dylib
|
44
|
+
- lib/rllama/x86_64-darwin/libggml-blas.dylib
|
45
|
+
- lib/rllama/x86_64-darwin/libggml-cpu.dylib
|
46
|
+
- lib/rllama/x86_64-darwin/libggml-rpc.dylib
|
47
|
+
- lib/rllama/x86_64-darwin/libggml.dylib
|
48
|
+
- lib/rllama/x86_64-darwin/libllama.dylib
|
49
|
+
licenses:
|
50
|
+
- MIT
|
51
|
+
metadata:
|
52
|
+
bug_tracker_uri: https://github.com/docusealco/rllama/issues
|
53
|
+
homepage_uri: https://github.com/docusealco/rllama
|
54
|
+
source_code_uri: https://github.com/docusealco/rllama
|
55
|
+
rubygems_mfa_required: 'true'
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 3.1.0
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubygems_version: 3.6.2
|
71
|
+
specification_version: 4
|
72
|
+
summary: Ruby bindings for Llama API
|
73
|
+
test_files: []
|