llama-rb 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
4
- data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
3
+ metadata.gz: 1371348a7ba9c4fa75ada41ec8afc6461e1d56dae2c3e3dede175d189ecdd7ea
4
+ data.tar.gz: b45a9ed3c28a228a2405ec8874f4cf8239dfcb4cb3132e7a44be806b5c6a2a78
5
5
  SHA512:
6
- metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
7
- data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5
6
+ metadata.gz: 88dd6f7a6f971f60753625dce11b469bbf46f606b4be4c8d308636d1f696666cacd9b174bda65bc5e42d503db413c9f1281c9a7129d838f1dfab3088717f603f
7
+ data.tar.gz: 449673e8950cc869ad899500b85a6108d2a02b7915ca340733bda0f18fa49691df7e839a6efece440d76a0583d037c90a6226f505eacc08ba24a9ae510b840bc
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
4
 
data/Gemfile.lock CHANGED
@@ -1,8 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- llama-rb (0.1.0)
5
- rice (~> 4.0.4)
4
+ llama-rb (0.2.0)
6
5
 
7
6
  GEM
8
7
  remote: https://rubygems.org/
@@ -26,7 +25,6 @@ GEM
26
25
  rake (13.0.6)
27
26
  regexp_parser (2.7.0)
28
27
  rexml (3.2.5)
29
- rice (4.0.4)
30
28
  rspec (3.12.0)
31
29
  rspec-core (~> 3.12.0)
32
30
  rspec-expectations (~> 3.12.0)
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'llama'
5
+
6
+ require 'irb'
7
+ IRB.start(__FILE__)
data/ext/Makefile ADDED
@@ -0,0 +1,4 @@
1
+ # dummy file to make gem installer happy
2
+ all:
3
+ clean:
4
+ install:
data/ext/extconf.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'fileutils'
2
+
3
+ root = File.dirname(__FILE__)
4
+ llama_root = File.join(root, '..', 'llama.cpp')
5
+
6
+ main = File.join(root, '..', 'bin', 'llama')
7
+ llama_main = File.join(llama_root, 'main')
8
+
9
+ Dir.chdir(llama_root) { system('make main', exception: true) }
10
+ FileUtils.cp(llama_main, main)
data/lib/llama/model.rb CHANGED
@@ -1,86 +1,58 @@
1
- require 'tempfile'
1
+ require 'open3'
2
+ require 'shellwords'
2
3
 
3
4
  module Llama
4
5
  class Model
5
- # move methods defined in `model.cpp` from public to private
6
- private :initialize_cpp, :predict_cpp
6
+ class ModelError < StandardError
7
+ end
7
8
 
8
- # rubocop:disable Metrics/MethodLength
9
- def self.new(
10
- model, # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
11
- n_ctx: 512, # context size
12
- n_parts: -1, # amount of model parts (-1 = determine from model dimensions)
13
- seed: Time.now.to_i, # RNG seed
14
- memory_f16: true, # use f16 instead of f32 for memory kv
15
- use_mlock: false # use mlock to keep model in memory
9
+ def initialize(
10
+ model,
11
+ seed: Time.now.to_i,
12
+ n_predict: 128,
13
+ binary: default_binary
16
14
  )
17
- instance = allocate
18
-
19
- instance.instance_eval do
20
- initialize
21
-
22
- @model = model
23
- @n_ctx = n_ctx
24
- @n_parts = n_parts
25
- @seed = seed
26
- @memory_f16 = memory_f16
27
- @use_mlock = use_mlock
28
-
29
- capture_stderr do
30
- initialize_cpp(
31
- model,
32
- n_ctx,
33
- n_parts,
34
- seed,
35
- memory_f16,
36
- use_mlock,
37
- )
38
- end
39
- end
40
-
41
- instance
15
+ @model = model
16
+ @seed = seed
17
+ @n_predict = n_predict
18
+ @binary = binary
42
19
  end
43
- # rubocop:enable Metrics/MethodLength
44
20
 
45
- def predict(
46
- prompt, # string used as prompt
47
- n_predict: 128 # number of tokens to predict
48
- )
49
- text = ''
21
+ def predict(prompt)
22
+ stdout, @stderr, @status = Open3.capture3(command(prompt))
50
23
 
51
- capture_stderr { text = predict_cpp(prompt, n_predict) }
24
+ raise ModelError, "Error #{status.to_i}" unless status.success?
52
25
 
53
- process_text(text)
26
+ # remove the space that is added as a tokenizer hack in examples/main/main.cpp
27
+ stdout[0] = ''
28
+ stdout
54
29
  end
55
30
 
56
- attr_reader :model, :n_ctx, :n_parts, :seed, :memory_f16, :use_mlock, :stderr
31
+ attr_reader :model, :seed, :n_predict, :binary
57
32
 
58
33
  private
59
34
 
60
- def capture_stderr
61
- previous = $stderr.dup
62
- tmp = Tempfile.open('llama-rb-stderr')
63
-
64
- begin
65
- $stderr.reopen(tmp)
35
+ attr_reader :stderr, :status
66
36
 
67
- yield
68
-
69
- tmp.rewind
70
- @stderr = tmp.read
71
- ensure
72
- tmp.close(true)
73
- $stderr.reopen(previous)
74
- end
37
+ def default_binary
38
+ File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
75
39
  end
76
40
 
77
- def process_text(text)
78
- text = text.force_encoding(Encoding.default_external)
41
+ def command(prompt)
42
+ escape_command(binary,
43
+ model: model,
44
+ prompt: prompt,
45
+ seed: seed,
46
+ n_predict: n_predict)
47
+ end
79
48
 
80
- # remove the space that was added as a tokenizer hack in model.cpp
81
- text[0] = '' if text.size.positive?
49
+ def escape_command(command, **flags)
50
+ flags_string = flags.map do |key, value|
51
+ "--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
52
+ end.join(' ')
53
+ command_string = Shellwords.escape(command)
82
54
 
83
- text
55
+ "#{command_string} #{flags_string}"
84
56
  end
85
57
  end
86
58
  end
data/lib/llama/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Llama
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
data/lib/llama.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require_relative 'llama/version'
2
- require_relative '../ext/llama/model'
3
2
  require_relative 'llama/model'
4
3
 
5
4
  module Llama
data/llama-rb.gemspec CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
5
5
  spec.version = Llama::VERSION
6
6
  spec.licenses = ['MIT']
7
7
  spec.authors = ['zfletch']
8
- spec.email = ['zfletch2@gmail.com']
8
+ spec.email = ['zf.rubygems@gmail.com']
9
9
 
10
10
  spec.summary = 'Ruby interface for Llama'
11
11
  spec.description = 'ggerganov/llama.cpp with Ruby hooks'
@@ -16,35 +16,35 @@ Gem::Specification.new do |spec|
16
16
  spec.metadata['source_code_uri'] = spec.homepage
17
17
  spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"
18
18
 
19
- # Specify which files should be added to the gem when it is released.
20
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
19
  spec.files = [
22
- "Gemfile",
23
- "Gemfile.lock",
24
- "LICENSE",
25
- "README.md",
26
- "Rakefile",
27
- "ext/llama/common.cpp",
28
- "ext/llama/common.h",
29
- "ext/llama/extconf.rb",
30
- "ext/llama/ggml.c",
31
- "ext/llama/ggml.h",
32
- "ext/llama/llama.cpp",
33
- "ext/llama/llama.h",
34
- "ext/llama/model.cpp",
35
- "lib/llama.rb",
36
- "lib/llama/model.rb",
37
- "lib/llama/version.rb",
38
- "llama-rb.gemspec",
39
- "llama.cpp",
40
- "models/.gitkeep",
20
+ 'Gemfile',
21
+ 'Gemfile.lock',
22
+ 'LICENSE',
23
+ 'README.md',
24
+ 'Rakefile',
25
+ 'bin/console',
26
+ 'ext/extconf.rb',
27
+ 'ext/Makefile',
28
+ 'lib/llama.rb',
29
+ 'lib/llama/model.rb',
30
+ 'lib/llama/version.rb',
31
+ 'llama-rb.gemspec',
32
+ 'llama.cpp/LICENSE',
33
+ 'llama.cpp/Makefile',
34
+ 'llama.cpp/README.md',
35
+ 'llama.cpp/examples/common.cpp',
36
+ 'llama.cpp/examples/common.h',
37
+ 'llama.cpp/examples/main/main.cpp',
38
+ 'llama.cpp/ggml.c',
39
+ 'llama.cpp/ggml.h',
40
+ 'llama.cpp/llama.cpp',
41
+ 'llama.cpp/llama.h',
42
+ 'models/.gitkeep',
41
43
  ]
42
44
  spec.bindir = 'exe'
43
45
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
44
46
  spec.require_paths = ['lib']
45
47
 
46
- spec.add_dependency 'rice', '~> 4.0.4'
47
-
48
- spec.extensions = %w[ext/llama/extconf.rb]
48
+ spec.extensions = %w[ext/extconf.rb]
49
49
  spec.metadata['rubygems_mfa_required'] = 'true'
50
50
  end
data/llama.cpp/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Georgi Gerganov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,175 @@
1
+ ifndef UNAME_S
2
+ UNAME_S := $(shell uname -s)
3
+ endif
4
+
5
+ ifndef UNAME_P
6
+ UNAME_P := $(shell uname -p)
7
+ endif
8
+
9
+ ifndef UNAME_M
10
+ UNAME_M := $(shell uname -m)
11
+ endif
12
+
13
+ CCV := $(shell $(CC) --version | head -n 1)
14
+ CXXV := $(shell $(CXX) --version | head -n 1)
15
+
16
+ # Mac OS + Arm can report x86_64
17
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
18
+ ifeq ($(UNAME_S),Darwin)
19
+ ifneq ($(UNAME_P),arm)
20
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
21
+ ifeq ($(SYSCTL_M),1)
22
+ # UNAME_P := arm
23
+ # UNAME_M := arm64
24
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
25
+ endif
26
+ endif
27
+ endif
28
+
29
+ #
30
+ # Compile flags
31
+ #
32
+
33
+ # keep standard at C11 and C++11
34
+ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
35
+ CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
36
+ LDFLAGS =
37
+
38
+ # warnings
39
+ CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
40
+ CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
41
+
42
+ # OS specific
43
+ # TODO: support Windows
44
+ ifeq ($(UNAME_S),Linux)
45
+ CFLAGS += -pthread
46
+ CXXFLAGS += -pthread
47
+ endif
48
+ ifeq ($(UNAME_S),Darwin)
49
+ CFLAGS += -pthread
50
+ CXXFLAGS += -pthread
51
+ endif
52
+ ifeq ($(UNAME_S),FreeBSD)
53
+ CFLAGS += -pthread
54
+ CXXFLAGS += -pthread
55
+ endif
56
+ ifeq ($(UNAME_S),NetBSD)
57
+ CFLAGS += -pthread
58
+ CXXFLAGS += -pthread
59
+ endif
60
+ ifeq ($(UNAME_S),OpenBSD)
61
+ CFLAGS += -pthread
62
+ CXXFLAGS += -pthread
63
+ endif
64
+ ifeq ($(UNAME_S),Haiku)
65
+ CFLAGS += -pthread
66
+ CXXFLAGS += -pthread
67
+ endif
68
+
69
+ # Architecture specific
70
+ # TODO: probably these flags need to be tweaked on some architectures
71
+ # feel free to update the Makefile for your architecture and send a pull request or issue
72
+ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
73
+ # Use all CPU extensions that are available:
74
+ CFLAGS += -march=native -mtune=native
75
+ CXXFLAGS += -march=native -mtune=native
76
+ endif
77
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
78
+ POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
79
+ ifneq (,$(findstring POWER9,$(POWER9_M)))
80
+ CFLAGS += -mcpu=power9
81
+ CXXFLAGS += -mcpu=power9
82
+ endif
83
+ # Require c++23's std::byteswap for big-endian support.
84
+ ifeq ($(UNAME_M),ppc64)
85
+ CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
86
+ endif
87
+ endif
88
+ ifndef LLAMA_NO_ACCELERATE
89
+ # Mac M1 - include Accelerate framework.
90
+ # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
91
+ ifeq ($(UNAME_S),Darwin)
92
+ CFLAGS += -DGGML_USE_ACCELERATE
93
+ LDFLAGS += -framework Accelerate
94
+ endif
95
+ endif
96
+ ifdef LLAMA_OPENBLAS
97
+ CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
98
+ LDFLAGS += -lopenblas
99
+ endif
100
+ ifdef LLAMA_GPROF
101
+ CFLAGS += -pg
102
+ CXXFLAGS += -pg
103
+ endif
104
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
105
+ CFLAGS += -mcpu=native
106
+ CXXFLAGS += -mcpu=native
107
+ endif
108
+ ifneq ($(filter armv6%,$(UNAME_M)),)
109
+ # Raspberry Pi 1, 2, 3
110
+ CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
111
+ endif
112
+ ifneq ($(filter armv7%,$(UNAME_M)),)
113
+ # Raspberry Pi 4
114
+ CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
115
+ endif
116
+ ifneq ($(filter armv8%,$(UNAME_M)),)
117
+ # Raspberry Pi 4
118
+ CFLAGS += -mfp16-format=ieee -mno-unaligned-access
119
+ endif
120
+
121
+ #
122
+ # Print build information
123
+ #
124
+
125
+ $(info I llama.cpp build info: )
126
+ $(info I UNAME_S: $(UNAME_S))
127
+ $(info I UNAME_P: $(UNAME_P))
128
+ $(info I UNAME_M: $(UNAME_M))
129
+ $(info I CFLAGS: $(CFLAGS))
130
+ $(info I CXXFLAGS: $(CXXFLAGS))
131
+ $(info I LDFLAGS: $(LDFLAGS))
132
+ $(info I CC: $(CCV))
133
+ $(info I CXX: $(CXXV))
134
+ $(info )
135
+
136
+ default: main quantize perplexity embedding
137
+
138
+ #
139
+ # Build library
140
+ #
141
+
142
+ ggml.o: ggml.c ggml.h
143
+ $(CC) $(CFLAGS) -c ggml.c -o ggml.o
144
+
145
+ llama.o: llama.cpp llama.h
146
+ $(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
147
+
148
+ common.o: examples/common.cpp examples/common.h
149
+ $(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
150
+
151
+ clean:
152
+ rm -vf *.o main quantize perplexity embedding
153
+
154
+ main: examples/main/main.cpp ggml.o llama.o common.o
155
+ $(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
156
+ @echo
157
+ @echo '==== Run ./main -h for help. ===='
158
+ @echo
159
+
160
+ quantize: examples/quantize/quantize.cpp ggml.o llama.o
161
+ $(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
162
+
163
+ perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
164
+ $(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
165
+
166
+ embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
167
+ $(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
168
+
169
+ #
170
+ # Tests
171
+ #
172
+
173
+ .PHONY: tests
174
+ tests:
175
+ bash ./tests/run-tests.sh