llama-rb 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -3
- data/bin/console +7 -0
- data/ext/Makefile +4 -0
- data/ext/extconf.rb +10 -0
- data/lib/llama/model.rb +36 -64
- data/lib/llama/version.rb +1 -1
- data/lib/llama.rb +0 -1
- data/llama-rb.gemspec +25 -25
- data/llama.cpp/LICENSE +21 -0
- data/llama.cpp/Makefile +175 -0
- data/llama.cpp/README.md +389 -0
- data/{ext/llama → llama.cpp/examples}/common.cpp +10 -3
- data/llama.cpp/examples/main/main.cpp +460 -0
- data/{ext/llama → llama.cpp}/ggml.c +587 -485
- data/{ext/llama → llama.cpp}/ggml.h +36 -26
- data/{ext/llama → llama.cpp}/llama.cpp +85 -35
- data/{ext/llama → llama.cpp}/llama.h +17 -0
- metadata +18 -27
- data/ext/llama/extconf.rb +0 -12
- data/ext/llama/model.cpp +0 -192
- /data/{ext/llama → llama.cpp/examples}/common.h +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1371348a7ba9c4fa75ada41ec8afc6461e1d56dae2c3e3dede175d189ecdd7ea
|
4
|
+
data.tar.gz: b45a9ed3c28a228a2405ec8874f4cf8239dfcb4cb3132e7a44be806b5c6a2a78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88dd6f7a6f971f60753625dce11b469bbf46f606b4be4c8d308636d1f696666cacd9b174bda65bc5e42d503db413c9f1281c9a7129d838f1dfab3088717f603f
|
7
|
+
data.tar.gz: 449673e8950cc869ad899500b85a6108d2a02b7915ca340733bda0f18fa49691df7e839a6efece440d76a0583d037c90a6226f505eacc08ba24a9ae510b840bc
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
llama-rb (0.
|
5
|
-
rice (~> 4.0.4)
|
4
|
+
llama-rb (0.2.0)
|
6
5
|
|
7
6
|
GEM
|
8
7
|
remote: https://rubygems.org/
|
@@ -26,7 +25,6 @@ GEM
|
|
26
25
|
rake (13.0.6)
|
27
26
|
regexp_parser (2.7.0)
|
28
27
|
rexml (3.2.5)
|
29
|
-
rice (4.0.4)
|
30
28
|
rspec (3.12.0)
|
31
29
|
rspec-core (~> 3.12.0)
|
32
30
|
rspec-expectations (~> 3.12.0)
|
data/bin/console
ADDED
data/ext/Makefile
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
root = File.dirname(__FILE__)
|
4
|
+
llama_root = File.join(root, '..', 'llama.cpp')
|
5
|
+
|
6
|
+
main = File.join(root, '..', 'bin', 'llama')
|
7
|
+
llama_main = File.join(llama_root, 'main')
|
8
|
+
|
9
|
+
Dir.chdir(llama_root) { system('make main', exception: true) }
|
10
|
+
FileUtils.cp(llama_main, main)
|
data/lib/llama/model.rb
CHANGED
@@ -1,86 +1,58 @@
|
|
1
|
-
require '
|
1
|
+
require 'open3'
|
2
|
+
require 'shellwords'
|
2
3
|
|
3
4
|
module Llama
|
4
5
|
class Model
|
5
|
-
|
6
|
-
|
6
|
+
class ModelError < StandardError
|
7
|
+
end
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
seed: Time.now.to_i, # RNG seed
|
14
|
-
memory_f16: true, # use f16 instead of f32 for memory kv
|
15
|
-
use_mlock: false # use mlock to keep model in memory
|
9
|
+
def initialize(
|
10
|
+
model,
|
11
|
+
seed: Time.now.to_i,
|
12
|
+
n_predict: 128,
|
13
|
+
binary: default_binary
|
16
14
|
)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
@model = model
|
23
|
-
@n_ctx = n_ctx
|
24
|
-
@n_parts = n_parts
|
25
|
-
@seed = seed
|
26
|
-
@memory_f16 = memory_f16
|
27
|
-
@use_mlock = use_mlock
|
28
|
-
|
29
|
-
capture_stderr do
|
30
|
-
initialize_cpp(
|
31
|
-
model,
|
32
|
-
n_ctx,
|
33
|
-
n_parts,
|
34
|
-
seed,
|
35
|
-
memory_f16,
|
36
|
-
use_mlock,
|
37
|
-
)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
instance
|
15
|
+
@model = model
|
16
|
+
@seed = seed
|
17
|
+
@n_predict = n_predict
|
18
|
+
@binary = binary
|
42
19
|
end
|
43
|
-
# rubocop:enable Metrics/MethodLength
|
44
20
|
|
45
|
-
def predict(
|
46
|
-
|
47
|
-
n_predict: 128 # number of tokens to predict
|
48
|
-
)
|
49
|
-
text = ''
|
21
|
+
def predict(prompt)
|
22
|
+
stdout, @stderr, @status = Open3.capture3(command(prompt))
|
50
23
|
|
51
|
-
|
24
|
+
raise ModelError, "Error #{status.to_i}" unless status.success?
|
52
25
|
|
53
|
-
|
26
|
+
# remove the space that is added as a tokenizer hack in examples/main/main.cpp
|
27
|
+
stdout[0] = ''
|
28
|
+
stdout
|
54
29
|
end
|
55
30
|
|
56
|
-
attr_reader :model, :
|
31
|
+
attr_reader :model, :seed, :n_predict, :binary
|
57
32
|
|
58
33
|
private
|
59
34
|
|
60
|
-
|
61
|
-
previous = $stderr.dup
|
62
|
-
tmp = Tempfile.open('llama-rb-stderr')
|
63
|
-
|
64
|
-
begin
|
65
|
-
$stderr.reopen(tmp)
|
35
|
+
attr_reader :stderr, :status
|
66
36
|
|
67
|
-
|
68
|
-
|
69
|
-
tmp.rewind
|
70
|
-
@stderr = tmp.read
|
71
|
-
ensure
|
72
|
-
tmp.close(true)
|
73
|
-
$stderr.reopen(previous)
|
74
|
-
end
|
37
|
+
def default_binary
|
38
|
+
File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
|
75
39
|
end
|
76
40
|
|
77
|
-
def
|
78
|
-
|
41
|
+
def command(prompt)
|
42
|
+
escape_command(binary,
|
43
|
+
model: model,
|
44
|
+
prompt: prompt,
|
45
|
+
seed: seed,
|
46
|
+
n_predict: n_predict)
|
47
|
+
end
|
79
48
|
|
80
|
-
|
81
|
-
|
49
|
+
def escape_command(command, **flags)
|
50
|
+
flags_string = flags.map do |key, value|
|
51
|
+
"--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
|
52
|
+
end.join(' ')
|
53
|
+
command_string = Shellwords.escape(command)
|
82
54
|
|
83
|
-
|
55
|
+
"#{command_string} #{flags_string}"
|
84
56
|
end
|
85
57
|
end
|
86
58
|
end
|
data/lib/llama/version.rb
CHANGED
data/lib/llama.rb
CHANGED
data/llama-rb.gemspec
CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
|
|
5
5
|
spec.version = Llama::VERSION
|
6
6
|
spec.licenses = ['MIT']
|
7
7
|
spec.authors = ['zfletch']
|
8
|
-
spec.email = ['
|
8
|
+
spec.email = ['zf.rubygems@gmail.com']
|
9
9
|
|
10
10
|
spec.summary = 'Ruby interface for Llama'
|
11
11
|
spec.description = 'ggerganov/llama.cpp with Ruby hooks'
|
@@ -16,35 +16,35 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.metadata['source_code_uri'] = spec.homepage
|
17
17
|
spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"
|
18
18
|
|
19
|
-
# Specify which files should be added to the gem when it is released.
|
20
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
19
|
spec.files = [
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
20
|
+
'Gemfile',
|
21
|
+
'Gemfile.lock',
|
22
|
+
'LICENSE',
|
23
|
+
'README.md',
|
24
|
+
'Rakefile',
|
25
|
+
'bin/console',
|
26
|
+
'ext/extconf.rb',
|
27
|
+
'ext/Makefile',
|
28
|
+
'lib/llama.rb',
|
29
|
+
'lib/llama/model.rb',
|
30
|
+
'lib/llama/version.rb',
|
31
|
+
'llama-rb.gemspec',
|
32
|
+
'llama.cpp/LICENSE',
|
33
|
+
'llama.cpp/Makefile',
|
34
|
+
'llama.cpp/README.md',
|
35
|
+
'llama.cpp/examples/common.cpp',
|
36
|
+
'llama.cpp/examples/common.h',
|
37
|
+
'llama.cpp/examples/main/main.cpp',
|
38
|
+
'llama.cpp/ggml.c',
|
39
|
+
'llama.cpp/ggml.h',
|
40
|
+
'llama.cpp/llama.cpp',
|
41
|
+
'llama.cpp/llama.h',
|
42
|
+
'models/.gitkeep',
|
41
43
|
]
|
42
44
|
spec.bindir = 'exe'
|
43
45
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
44
46
|
spec.require_paths = ['lib']
|
45
47
|
|
46
|
-
spec.
|
47
|
-
|
48
|
-
spec.extensions = %w[ext/llama/extconf.rb]
|
48
|
+
spec.extensions = %w[ext/extconf.rb]
|
49
49
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
50
50
|
end
|
data/llama.cpp/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2023 Georgi Gerganov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/llama.cpp/Makefile
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
ifndef UNAME_S
|
2
|
+
UNAME_S := $(shell uname -s)
|
3
|
+
endif
|
4
|
+
|
5
|
+
ifndef UNAME_P
|
6
|
+
UNAME_P := $(shell uname -p)
|
7
|
+
endif
|
8
|
+
|
9
|
+
ifndef UNAME_M
|
10
|
+
UNAME_M := $(shell uname -m)
|
11
|
+
endif
|
12
|
+
|
13
|
+
CCV := $(shell $(CC) --version | head -n 1)
|
14
|
+
CXXV := $(shell $(CXX) --version | head -n 1)
|
15
|
+
|
16
|
+
# Mac OS + Arm can report x86_64
|
17
|
+
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
18
|
+
ifeq ($(UNAME_S),Darwin)
|
19
|
+
ifneq ($(UNAME_P),arm)
|
20
|
+
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
|
21
|
+
ifeq ($(SYSCTL_M),1)
|
22
|
+
# UNAME_P := arm
|
23
|
+
# UNAME_M := arm64
|
24
|
+
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
25
|
+
endif
|
26
|
+
endif
|
27
|
+
endif
|
28
|
+
|
29
|
+
#
|
30
|
+
# Compile flags
|
31
|
+
#
|
32
|
+
|
33
|
+
# keep standard at C11 and C++11
|
34
|
+
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
35
|
+
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
36
|
+
LDFLAGS =
|
37
|
+
|
38
|
+
# warnings
|
39
|
+
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
|
40
|
+
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
41
|
+
|
42
|
+
# OS specific
|
43
|
+
# TODO: support Windows
|
44
|
+
ifeq ($(UNAME_S),Linux)
|
45
|
+
CFLAGS += -pthread
|
46
|
+
CXXFLAGS += -pthread
|
47
|
+
endif
|
48
|
+
ifeq ($(UNAME_S),Darwin)
|
49
|
+
CFLAGS += -pthread
|
50
|
+
CXXFLAGS += -pthread
|
51
|
+
endif
|
52
|
+
ifeq ($(UNAME_S),FreeBSD)
|
53
|
+
CFLAGS += -pthread
|
54
|
+
CXXFLAGS += -pthread
|
55
|
+
endif
|
56
|
+
ifeq ($(UNAME_S),NetBSD)
|
57
|
+
CFLAGS += -pthread
|
58
|
+
CXXFLAGS += -pthread
|
59
|
+
endif
|
60
|
+
ifeq ($(UNAME_S),OpenBSD)
|
61
|
+
CFLAGS += -pthread
|
62
|
+
CXXFLAGS += -pthread
|
63
|
+
endif
|
64
|
+
ifeq ($(UNAME_S),Haiku)
|
65
|
+
CFLAGS += -pthread
|
66
|
+
CXXFLAGS += -pthread
|
67
|
+
endif
|
68
|
+
|
69
|
+
# Architecture specific
|
70
|
+
# TODO: probably these flags need to be tweaked on some architectures
|
71
|
+
# feel free to update the Makefile for your architecture and send a pull request or issue
|
72
|
+
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
73
|
+
# Use all CPU extensions that are available:
|
74
|
+
CFLAGS += -march=native -mtune=native
|
75
|
+
CXXFLAGS += -march=native -mtune=native
|
76
|
+
endif
|
77
|
+
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
78
|
+
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
79
|
+
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
80
|
+
CFLAGS += -mcpu=power9
|
81
|
+
CXXFLAGS += -mcpu=power9
|
82
|
+
endif
|
83
|
+
# Require c++23's std::byteswap for big-endian support.
|
84
|
+
ifeq ($(UNAME_M),ppc64)
|
85
|
+
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
86
|
+
endif
|
87
|
+
endif
|
88
|
+
ifndef LLAMA_NO_ACCELERATE
|
89
|
+
# Mac M1 - include Accelerate framework.
|
90
|
+
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
|
91
|
+
ifeq ($(UNAME_S),Darwin)
|
92
|
+
CFLAGS += -DGGML_USE_ACCELERATE
|
93
|
+
LDFLAGS += -framework Accelerate
|
94
|
+
endif
|
95
|
+
endif
|
96
|
+
ifdef LLAMA_OPENBLAS
|
97
|
+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
98
|
+
LDFLAGS += -lopenblas
|
99
|
+
endif
|
100
|
+
ifdef LLAMA_GPROF
|
101
|
+
CFLAGS += -pg
|
102
|
+
CXXFLAGS += -pg
|
103
|
+
endif
|
104
|
+
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
105
|
+
CFLAGS += -mcpu=native
|
106
|
+
CXXFLAGS += -mcpu=native
|
107
|
+
endif
|
108
|
+
ifneq ($(filter armv6%,$(UNAME_M)),)
|
109
|
+
# Raspberry Pi 1, 2, 3
|
110
|
+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
111
|
+
endif
|
112
|
+
ifneq ($(filter armv7%,$(UNAME_M)),)
|
113
|
+
# Raspberry Pi 4
|
114
|
+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
115
|
+
endif
|
116
|
+
ifneq ($(filter armv8%,$(UNAME_M)),)
|
117
|
+
# Raspberry Pi 4
|
118
|
+
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
119
|
+
endif
|
120
|
+
|
121
|
+
#
|
122
|
+
# Print build information
|
123
|
+
#
|
124
|
+
|
125
|
+
$(info I llama.cpp build info: )
|
126
|
+
$(info I UNAME_S: $(UNAME_S))
|
127
|
+
$(info I UNAME_P: $(UNAME_P))
|
128
|
+
$(info I UNAME_M: $(UNAME_M))
|
129
|
+
$(info I CFLAGS: $(CFLAGS))
|
130
|
+
$(info I CXXFLAGS: $(CXXFLAGS))
|
131
|
+
$(info I LDFLAGS: $(LDFLAGS))
|
132
|
+
$(info I CC: $(CCV))
|
133
|
+
$(info I CXX: $(CXXV))
|
134
|
+
$(info )
|
135
|
+
|
136
|
+
default: main quantize perplexity embedding
|
137
|
+
|
138
|
+
#
|
139
|
+
# Build library
|
140
|
+
#
|
141
|
+
|
142
|
+
ggml.o: ggml.c ggml.h
|
143
|
+
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
144
|
+
|
145
|
+
llama.o: llama.cpp llama.h
|
146
|
+
$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
|
147
|
+
|
148
|
+
common.o: examples/common.cpp examples/common.h
|
149
|
+
$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
|
150
|
+
|
151
|
+
clean:
|
152
|
+
rm -vf *.o main quantize perplexity embedding
|
153
|
+
|
154
|
+
main: examples/main/main.cpp ggml.o llama.o common.o
|
155
|
+
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
|
156
|
+
@echo
|
157
|
+
@echo '==== Run ./main -h for help. ===='
|
158
|
+
@echo
|
159
|
+
|
160
|
+
quantize: examples/quantize/quantize.cpp ggml.o llama.o
|
161
|
+
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
|
162
|
+
|
163
|
+
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
|
164
|
+
$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
|
165
|
+
|
166
|
+
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
|
167
|
+
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
|
168
|
+
|
169
|
+
#
|
170
|
+
# Tests
|
171
|
+
#
|
172
|
+
|
173
|
+
.PHONY: tests
|
174
|
+
tests:
|
175
|
+
bash ./tests/run-tests.sh
|