llama-rb 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -3
- data/bin/console +7 -0
- data/ext/Makefile +4 -0
- data/ext/extconf.rb +10 -0
- data/lib/llama/model.rb +36 -64
- data/lib/llama/version.rb +1 -1
- data/lib/llama.rb +0 -1
- data/llama-rb.gemspec +25 -25
- data/llama.cpp/LICENSE +21 -0
- data/llama.cpp/Makefile +175 -0
- data/llama.cpp/README.md +389 -0
- data/{ext/llama → llama.cpp/examples}/common.cpp +10 -3
- data/llama.cpp/examples/main/main.cpp +460 -0
- data/{ext/llama → llama.cpp}/ggml.c +587 -485
- data/{ext/llama → llama.cpp}/ggml.h +36 -26
- data/{ext/llama → llama.cpp}/llama.cpp +85 -35
- data/{ext/llama → llama.cpp}/llama.h +17 -0
- metadata +18 -27
- data/ext/llama/extconf.rb +0 -12
- data/ext/llama/model.cpp +0 -192
- /data/{ext/llama → llama.cpp/examples}/common.h +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1371348a7ba9c4fa75ada41ec8afc6461e1d56dae2c3e3dede175d189ecdd7ea
|
4
|
+
data.tar.gz: b45a9ed3c28a228a2405ec8874f4cf8239dfcb4cb3132e7a44be806b5c6a2a78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88dd6f7a6f971f60753625dce11b469bbf46f606b4be4c8d308636d1f696666cacd9b174bda65bc5e42d503db413c9f1281c9a7129d838f1dfab3088717f603f
|
7
|
+
data.tar.gz: 449673e8950cc869ad899500b85a6108d2a02b7915ca340733bda0f18fa49691df7e839a6efece440d76a0583d037c90a6226f505eacc08ba24a9ae510b840bc
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
llama-rb (0.
|
5
|
-
rice (~> 4.0.4)
|
4
|
+
llama-rb (0.2.0)
|
6
5
|
|
7
6
|
GEM
|
8
7
|
remote: https://rubygems.org/
|
@@ -26,7 +25,6 @@ GEM
|
|
26
25
|
rake (13.0.6)
|
27
26
|
regexp_parser (2.7.0)
|
28
27
|
rexml (3.2.5)
|
29
|
-
rice (4.0.4)
|
30
28
|
rspec (3.12.0)
|
31
29
|
rspec-core (~> 3.12.0)
|
32
30
|
rspec-expectations (~> 3.12.0)
|
data/bin/console
ADDED
data/ext/Makefile
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
root = File.dirname(__FILE__)
|
4
|
+
llama_root = File.join(root, '..', 'llama.cpp')
|
5
|
+
|
6
|
+
main = File.join(root, '..', 'bin', 'llama')
|
7
|
+
llama_main = File.join(llama_root, 'main')
|
8
|
+
|
9
|
+
Dir.chdir(llama_root) { system('make main', exception: true) }
|
10
|
+
FileUtils.cp(llama_main, main)
|
data/lib/llama/model.rb
CHANGED
@@ -1,86 +1,58 @@
|
|
1
|
-
require '
|
1
|
+
require 'open3'
|
2
|
+
require 'shellwords'
|
2
3
|
|
3
4
|
module Llama
|
4
5
|
class Model
|
5
|
-
|
6
|
-
|
6
|
+
class ModelError < StandardError
|
7
|
+
end
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
seed: Time.now.to_i, # RNG seed
|
14
|
-
memory_f16: true, # use f16 instead of f32 for memory kv
|
15
|
-
use_mlock: false # use mlock to keep model in memory
|
9
|
+
def initialize(
|
10
|
+
model,
|
11
|
+
seed: Time.now.to_i,
|
12
|
+
n_predict: 128,
|
13
|
+
binary: default_binary
|
16
14
|
)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
@model = model
|
23
|
-
@n_ctx = n_ctx
|
24
|
-
@n_parts = n_parts
|
25
|
-
@seed = seed
|
26
|
-
@memory_f16 = memory_f16
|
27
|
-
@use_mlock = use_mlock
|
28
|
-
|
29
|
-
capture_stderr do
|
30
|
-
initialize_cpp(
|
31
|
-
model,
|
32
|
-
n_ctx,
|
33
|
-
n_parts,
|
34
|
-
seed,
|
35
|
-
memory_f16,
|
36
|
-
use_mlock,
|
37
|
-
)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
instance
|
15
|
+
@model = model
|
16
|
+
@seed = seed
|
17
|
+
@n_predict = n_predict
|
18
|
+
@binary = binary
|
42
19
|
end
|
43
|
-
# rubocop:enable Metrics/MethodLength
|
44
20
|
|
45
|
-
def predict(
|
46
|
-
|
47
|
-
n_predict: 128 # number of tokens to predict
|
48
|
-
)
|
49
|
-
text = ''
|
21
|
+
def predict(prompt)
|
22
|
+
stdout, @stderr, @status = Open3.capture3(command(prompt))
|
50
23
|
|
51
|
-
|
24
|
+
raise ModelError, "Error #{status.to_i}" unless status.success?
|
52
25
|
|
53
|
-
|
26
|
+
# remove the space that is added as a tokenizer hack in examples/main/main.cpp
|
27
|
+
stdout[0] = ''
|
28
|
+
stdout
|
54
29
|
end
|
55
30
|
|
56
|
-
attr_reader :model, :
|
31
|
+
attr_reader :model, :seed, :n_predict, :binary
|
57
32
|
|
58
33
|
private
|
59
34
|
|
60
|
-
|
61
|
-
previous = $stderr.dup
|
62
|
-
tmp = Tempfile.open('llama-rb-stderr')
|
63
|
-
|
64
|
-
begin
|
65
|
-
$stderr.reopen(tmp)
|
35
|
+
attr_reader :stderr, :status
|
66
36
|
|
67
|
-
|
68
|
-
|
69
|
-
tmp.rewind
|
70
|
-
@stderr = tmp.read
|
71
|
-
ensure
|
72
|
-
tmp.close(true)
|
73
|
-
$stderr.reopen(previous)
|
74
|
-
end
|
37
|
+
def default_binary
|
38
|
+
File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
|
75
39
|
end
|
76
40
|
|
77
|
-
def
|
78
|
-
|
41
|
+
def command(prompt)
|
42
|
+
escape_command(binary,
|
43
|
+
model: model,
|
44
|
+
prompt: prompt,
|
45
|
+
seed: seed,
|
46
|
+
n_predict: n_predict)
|
47
|
+
end
|
79
48
|
|
80
|
-
|
81
|
-
|
49
|
+
def escape_command(command, **flags)
|
50
|
+
flags_string = flags.map do |key, value|
|
51
|
+
"--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
|
52
|
+
end.join(' ')
|
53
|
+
command_string = Shellwords.escape(command)
|
82
54
|
|
83
|
-
|
55
|
+
"#{command_string} #{flags_string}"
|
84
56
|
end
|
85
57
|
end
|
86
58
|
end
|
data/lib/llama/version.rb
CHANGED
data/lib/llama.rb
CHANGED
data/llama-rb.gemspec
CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
|
|
5
5
|
spec.version = Llama::VERSION
|
6
6
|
spec.licenses = ['MIT']
|
7
7
|
spec.authors = ['zfletch']
|
8
|
-
spec.email = ['
|
8
|
+
spec.email = ['zf.rubygems@gmail.com']
|
9
9
|
|
10
10
|
spec.summary = 'Ruby interface for Llama'
|
11
11
|
spec.description = 'ggerganov/llama.cpp with Ruby hooks'
|
@@ -16,35 +16,35 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.metadata['source_code_uri'] = spec.homepage
|
17
17
|
spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"
|
18
18
|
|
19
|
-
# Specify which files should be added to the gem when it is released.
|
20
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
19
|
spec.files = [
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
20
|
+
'Gemfile',
|
21
|
+
'Gemfile.lock',
|
22
|
+
'LICENSE',
|
23
|
+
'README.md',
|
24
|
+
'Rakefile',
|
25
|
+
'bin/console',
|
26
|
+
'ext/extconf.rb',
|
27
|
+
'ext/Makefile',
|
28
|
+
'lib/llama.rb',
|
29
|
+
'lib/llama/model.rb',
|
30
|
+
'lib/llama/version.rb',
|
31
|
+
'llama-rb.gemspec',
|
32
|
+
'llama.cpp/LICENSE',
|
33
|
+
'llama.cpp/Makefile',
|
34
|
+
'llama.cpp/README.md',
|
35
|
+
'llama.cpp/examples/common.cpp',
|
36
|
+
'llama.cpp/examples/common.h',
|
37
|
+
'llama.cpp/examples/main/main.cpp',
|
38
|
+
'llama.cpp/ggml.c',
|
39
|
+
'llama.cpp/ggml.h',
|
40
|
+
'llama.cpp/llama.cpp',
|
41
|
+
'llama.cpp/llama.h',
|
42
|
+
'models/.gitkeep',
|
41
43
|
]
|
42
44
|
spec.bindir = 'exe'
|
43
45
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
44
46
|
spec.require_paths = ['lib']
|
45
47
|
|
46
|
-
spec.
|
47
|
-
|
48
|
-
spec.extensions = %w[ext/llama/extconf.rb]
|
48
|
+
spec.extensions = %w[ext/extconf.rb]
|
49
49
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
50
50
|
end
|
data/llama.cpp/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2023 Georgi Gerganov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/llama.cpp/Makefile
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
ifndef UNAME_S
|
2
|
+
UNAME_S := $(shell uname -s)
|
3
|
+
endif
|
4
|
+
|
5
|
+
ifndef UNAME_P
|
6
|
+
UNAME_P := $(shell uname -p)
|
7
|
+
endif
|
8
|
+
|
9
|
+
ifndef UNAME_M
|
10
|
+
UNAME_M := $(shell uname -m)
|
11
|
+
endif
|
12
|
+
|
13
|
+
CCV := $(shell $(CC) --version | head -n 1)
|
14
|
+
CXXV := $(shell $(CXX) --version | head -n 1)
|
15
|
+
|
16
|
+
# Mac OS + Arm can report x86_64
|
17
|
+
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
18
|
+
ifeq ($(UNAME_S),Darwin)
|
19
|
+
ifneq ($(UNAME_P),arm)
|
20
|
+
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
|
21
|
+
ifeq ($(SYSCTL_M),1)
|
22
|
+
# UNAME_P := arm
|
23
|
+
# UNAME_M := arm64
|
24
|
+
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
25
|
+
endif
|
26
|
+
endif
|
27
|
+
endif
|
28
|
+
|
29
|
+
#
|
30
|
+
# Compile flags
|
31
|
+
#
|
32
|
+
|
33
|
+
# keep standard at C11 and C++11
|
34
|
+
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
35
|
+
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
36
|
+
LDFLAGS =
|
37
|
+
|
38
|
+
# warnings
|
39
|
+
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
|
40
|
+
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
41
|
+
|
42
|
+
# OS specific
|
43
|
+
# TODO: support Windows
|
44
|
+
ifeq ($(UNAME_S),Linux)
|
45
|
+
CFLAGS += -pthread
|
46
|
+
CXXFLAGS += -pthread
|
47
|
+
endif
|
48
|
+
ifeq ($(UNAME_S),Darwin)
|
49
|
+
CFLAGS += -pthread
|
50
|
+
CXXFLAGS += -pthread
|
51
|
+
endif
|
52
|
+
ifeq ($(UNAME_S),FreeBSD)
|
53
|
+
CFLAGS += -pthread
|
54
|
+
CXXFLAGS += -pthread
|
55
|
+
endif
|
56
|
+
ifeq ($(UNAME_S),NetBSD)
|
57
|
+
CFLAGS += -pthread
|
58
|
+
CXXFLAGS += -pthread
|
59
|
+
endif
|
60
|
+
ifeq ($(UNAME_S),OpenBSD)
|
61
|
+
CFLAGS += -pthread
|
62
|
+
CXXFLAGS += -pthread
|
63
|
+
endif
|
64
|
+
ifeq ($(UNAME_S),Haiku)
|
65
|
+
CFLAGS += -pthread
|
66
|
+
CXXFLAGS += -pthread
|
67
|
+
endif
|
68
|
+
|
69
|
+
# Architecture specific
|
70
|
+
# TODO: probably these flags need to be tweaked on some architectures
|
71
|
+
# feel free to update the Makefile for your architecture and send a pull request or issue
|
72
|
+
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
73
|
+
# Use all CPU extensions that are available:
|
74
|
+
CFLAGS += -march=native -mtune=native
|
75
|
+
CXXFLAGS += -march=native -mtune=native
|
76
|
+
endif
|
77
|
+
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
78
|
+
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
79
|
+
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
80
|
+
CFLAGS += -mcpu=power9
|
81
|
+
CXXFLAGS += -mcpu=power9
|
82
|
+
endif
|
83
|
+
# Require c++23's std::byteswap for big-endian support.
|
84
|
+
ifeq ($(UNAME_M),ppc64)
|
85
|
+
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
86
|
+
endif
|
87
|
+
endif
|
88
|
+
ifndef LLAMA_NO_ACCELERATE
|
89
|
+
# Mac M1 - include Accelerate framework.
|
90
|
+
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
|
91
|
+
ifeq ($(UNAME_S),Darwin)
|
92
|
+
CFLAGS += -DGGML_USE_ACCELERATE
|
93
|
+
LDFLAGS += -framework Accelerate
|
94
|
+
endif
|
95
|
+
endif
|
96
|
+
ifdef LLAMA_OPENBLAS
|
97
|
+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
98
|
+
LDFLAGS += -lopenblas
|
99
|
+
endif
|
100
|
+
ifdef LLAMA_GPROF
|
101
|
+
CFLAGS += -pg
|
102
|
+
CXXFLAGS += -pg
|
103
|
+
endif
|
104
|
+
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
105
|
+
CFLAGS += -mcpu=native
|
106
|
+
CXXFLAGS += -mcpu=native
|
107
|
+
endif
|
108
|
+
ifneq ($(filter armv6%,$(UNAME_M)),)
|
109
|
+
# Raspberry Pi 1, 2, 3
|
110
|
+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
111
|
+
endif
|
112
|
+
ifneq ($(filter armv7%,$(UNAME_M)),)
|
113
|
+
# Raspberry Pi 4
|
114
|
+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
115
|
+
endif
|
116
|
+
ifneq ($(filter armv8%,$(UNAME_M)),)
|
117
|
+
# Raspberry Pi 4
|
118
|
+
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
119
|
+
endif
|
120
|
+
|
121
|
+
#
|
122
|
+
# Print build information
|
123
|
+
#
|
124
|
+
|
125
|
+
$(info I llama.cpp build info: )
|
126
|
+
$(info I UNAME_S: $(UNAME_S))
|
127
|
+
$(info I UNAME_P: $(UNAME_P))
|
128
|
+
$(info I UNAME_M: $(UNAME_M))
|
129
|
+
$(info I CFLAGS: $(CFLAGS))
|
130
|
+
$(info I CXXFLAGS: $(CXXFLAGS))
|
131
|
+
$(info I LDFLAGS: $(LDFLAGS))
|
132
|
+
$(info I CC: $(CCV))
|
133
|
+
$(info I CXX: $(CXXV))
|
134
|
+
$(info )
|
135
|
+
|
136
|
+
default: main quantize perplexity embedding
|
137
|
+
|
138
|
+
#
|
139
|
+
# Build library
|
140
|
+
#
|
141
|
+
|
142
|
+
ggml.o: ggml.c ggml.h
|
143
|
+
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
144
|
+
|
145
|
+
llama.o: llama.cpp llama.h
|
146
|
+
$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
|
147
|
+
|
148
|
+
common.o: examples/common.cpp examples/common.h
|
149
|
+
$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
|
150
|
+
|
151
|
+
clean:
|
152
|
+
rm -vf *.o main quantize perplexity embedding
|
153
|
+
|
154
|
+
main: examples/main/main.cpp ggml.o llama.o common.o
|
155
|
+
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
|
156
|
+
@echo
|
157
|
+
@echo '==== Run ./main -h for help. ===='
|
158
|
+
@echo
|
159
|
+
|
160
|
+
quantize: examples/quantize/quantize.cpp ggml.o llama.o
|
161
|
+
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
|
162
|
+
|
163
|
+
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
|
164
|
+
$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
|
165
|
+
|
166
|
+
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
|
167
|
+
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
|
168
|
+
|
169
|
+
#
|
170
|
+
# Tests
|
171
|
+
#
|
172
|
+
|
173
|
+
.PHONY: tests
|
174
|
+
tests:
|
175
|
+
bash ./tests/run-tests.sh
|