llama-rb 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
4
- data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
3
+ metadata.gz: '03801e4f99933be9c0e8d559008626991535c2167af88c8cb31defb31c88d0f6'
4
+ data.tar.gz: 6f17e50818de906f33de2686cf1b75c0e17aa052f0fba60889bad85df0591f59
5
5
  SHA512:
6
- metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
7
- data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5
6
+ metadata.gz: 40602fc8c253087a78fd4e5edf5fbae24f3a4ad0d9a3bb2f6730ef701753f6815e8716303220e8edcb1984484d5ffbd20c6adb7e07690244cd738ec6918c80e8
7
+ data.tar.gz: 9cbf6bed4fa4359bd007d083f99976a885b1557b0bf01c4d22a55e231515adf7f66e58e951e01bf731e827b893bf6fc278a306f8a566be3e133039f210214bc2
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
4
 
data/Gemfile.lock CHANGED
@@ -1,8 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- llama-rb (0.1.0)
5
- rice (~> 4.0.4)
4
+ llama-rb (0.2.0)
6
5
 
7
6
  GEM
8
7
  remote: https://rubygems.org/
@@ -26,7 +25,6 @@ GEM
26
25
  rake (13.0.6)
27
26
  regexp_parser (2.7.0)
28
27
  rexml (3.2.5)
29
- rice (4.0.4)
30
28
  rspec (3.12.0)
31
29
  rspec-core (~> 3.12.0)
32
30
  rspec-expectations (~> 3.12.0)
data/README.md CHANGED
@@ -42,21 +42,15 @@ m.predict('hello world')
42
42
  ```ruby
43
43
  def self.new(
44
44
  model, # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
45
- n_ctx: 512, # context size
46
- n_parts: -1, # amount of model parts (-1 = determine from model dimensions)
45
+ n_predict: 128 # number of tokens to predict
47
46
  seed: Time.now.to_i, # RNG seed
48
- memory_f16: true, # use f16 instead of f32 for memory kv
49
- use_mlock: false # use mlock to keep model in memory
50
47
  )
51
48
  ```
52
49
 
53
50
  #### Llama::Model#predict
54
51
 
55
52
  ```ruby
56
- def predict(
57
- prompt, # string used as prompt
58
- n_predict: 128 # number of tokens to predict
59
- )
53
+ def predict(prompt)
60
54
  ```
61
55
 
62
56
  ## Development
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'llama'
5
+
6
+ require 'irb'
7
+ IRB.start(__FILE__)
data/ext/Makefile ADDED
@@ -0,0 +1,4 @@
1
+ # dummy file to make gem installer happy
2
+ all:
3
+ clean:
4
+ install:
data/ext/extconf.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'fileutils'
2
+
3
+ root = File.dirname(__FILE__)
4
+ llama_root = File.join(root, '..', 'llama.cpp')
5
+
6
+ main = File.join(root, '..', 'bin', 'llama')
7
+ llama_main = File.join(llama_root, 'main')
8
+
9
+ Dir.chdir(llama_root) { system('make main', exception: true) }
10
+ FileUtils.cp(llama_main, main)
data/lib/llama/model.rb CHANGED
@@ -1,86 +1,58 @@
1
- require 'tempfile'
1
+ require 'open3'
2
+ require 'shellwords'
2
3
 
3
4
  module Llama
4
5
  class Model
5
- # move methods defined in `model.cpp` from public to private
6
- private :initialize_cpp, :predict_cpp
6
+ class ModelError < StandardError
7
+ end
7
8
 
8
- # rubocop:disable Metrics/MethodLength
9
- def self.new(
10
- model, # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
11
- n_ctx: 512, # context size
12
- n_parts: -1, # amount of model parts (-1 = determine from model dimensions)
13
- seed: Time.now.to_i, # RNG seed
14
- memory_f16: true, # use f16 instead of f32 for memory kv
15
- use_mlock: false # use mlock to keep model in memory
9
+ def initialize(
10
+ model,
11
+ seed: Time.now.to_i,
12
+ n_predict: 128,
13
+ binary: default_binary
16
14
  )
17
- instance = allocate
18
-
19
- instance.instance_eval do
20
- initialize
21
-
22
- @model = model
23
- @n_ctx = n_ctx
24
- @n_parts = n_parts
25
- @seed = seed
26
- @memory_f16 = memory_f16
27
- @use_mlock = use_mlock
28
-
29
- capture_stderr do
30
- initialize_cpp(
31
- model,
32
- n_ctx,
33
- n_parts,
34
- seed,
35
- memory_f16,
36
- use_mlock,
37
- )
38
- end
39
- end
40
-
41
- instance
15
+ @model = model
16
+ @seed = seed
17
+ @n_predict = n_predict
18
+ @binary = binary
42
19
  end
43
- # rubocop:enable Metrics/MethodLength
44
20
 
45
- def predict(
46
- prompt, # string used as prompt
47
- n_predict: 128 # number of tokens to predict
48
- )
49
- text = ''
21
+ def predict(prompt)
22
+ stdout, @stderr, @status = Open3.capture3(command(prompt))
50
23
 
51
- capture_stderr { text = predict_cpp(prompt, n_predict) }
24
+ raise ModelError, "Error #{status.to_i}" unless status.success?
52
25
 
53
- process_text(text)
26
+ # remove the space that is added as a tokenizer hack in examples/main/main.cpp
27
+ stdout[0] = ''
28
+ stdout
54
29
  end
55
30
 
56
- attr_reader :model, :n_ctx, :n_parts, :seed, :memory_f16, :use_mlock, :stderr
31
+ attr_reader :model, :seed, :n_predict, :binary
57
32
 
58
33
  private
59
34
 
60
- def capture_stderr
61
- previous = $stderr.dup
62
- tmp = Tempfile.open('llama-rb-stderr')
63
-
64
- begin
65
- $stderr.reopen(tmp)
35
+ attr_reader :stderr, :status
66
36
 
67
- yield
68
-
69
- tmp.rewind
70
- @stderr = tmp.read
71
- ensure
72
- tmp.close(true)
73
- $stderr.reopen(previous)
74
- end
37
+ def default_binary
38
+ File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
75
39
  end
76
40
 
77
- def process_text(text)
78
- text = text.force_encoding(Encoding.default_external)
41
+ def command(prompt)
42
+ escape_command(binary,
43
+ model: model,
44
+ prompt: prompt,
45
+ seed: seed,
46
+ n_predict: n_predict)
47
+ end
79
48
 
80
- # remove the space that was added as a tokenizer hack in model.cpp
81
- text[0] = '' if text.size.positive?
49
+ def escape_command(command, **flags)
50
+ flags_string = flags.map do |key, value|
51
+ "--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
52
+ end.join(' ')
53
+ command_string = Shellwords.escape(command)
82
54
 
83
- text
55
+ "#{command_string} #{flags_string}"
84
56
  end
85
57
  end
86
58
  end
data/lib/llama/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Llama
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.2.1'.freeze
3
3
  end
data/lib/llama.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require_relative 'llama/version'
2
- require_relative '../ext/llama/model'
3
2
  require_relative 'llama/model'
4
3
 
5
4
  module Llama
data/llama-rb.gemspec CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
5
5
  spec.version = Llama::VERSION
6
6
  spec.licenses = ['MIT']
7
7
  spec.authors = ['zfletch']
8
- spec.email = ['zfletch2@gmail.com']
8
+ spec.email = ['zf.rubygems@gmail.com']
9
9
 
10
10
  spec.summary = 'Ruby interface for Llama'
11
11
  spec.description = 'ggerganov/llama.cpp with Ruby hooks'
@@ -16,35 +16,35 @@ Gem::Specification.new do |spec|
16
16
  spec.metadata['source_code_uri'] = spec.homepage
17
17
  spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"
18
18
 
19
- # Specify which files should be added to the gem when it is released.
20
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
19
  spec.files = [
22
- "Gemfile",
23
- "Gemfile.lock",
24
- "LICENSE",
25
- "README.md",
26
- "Rakefile",
27
- "ext/llama/common.cpp",
28
- "ext/llama/common.h",
29
- "ext/llama/extconf.rb",
30
- "ext/llama/ggml.c",
31
- "ext/llama/ggml.h",
32
- "ext/llama/llama.cpp",
33
- "ext/llama/llama.h",
34
- "ext/llama/model.cpp",
35
- "lib/llama.rb",
36
- "lib/llama/model.rb",
37
- "lib/llama/version.rb",
38
- "llama-rb.gemspec",
39
- "llama.cpp",
40
- "models/.gitkeep",
20
+ 'Gemfile',
21
+ 'Gemfile.lock',
22
+ 'LICENSE',
23
+ 'README.md',
24
+ 'Rakefile',
25
+ 'bin/console',
26
+ 'ext/extconf.rb',
27
+ 'ext/Makefile',
28
+ 'lib/llama.rb',
29
+ 'lib/llama/model.rb',
30
+ 'lib/llama/version.rb',
31
+ 'llama-rb.gemspec',
32
+ 'llama.cpp/LICENSE',
33
+ 'llama.cpp/Makefile',
34
+ 'llama.cpp/README.md',
35
+ 'llama.cpp/examples/common.cpp',
36
+ 'llama.cpp/examples/common.h',
37
+ 'llama.cpp/examples/main/main.cpp',
38
+ 'llama.cpp/ggml.c',
39
+ 'llama.cpp/ggml.h',
40
+ 'llama.cpp/llama.cpp',
41
+ 'llama.cpp/llama.h',
42
+ 'models/.gitkeep',
41
43
  ]
42
44
  spec.bindir = 'exe'
43
45
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
44
46
  spec.require_paths = ['lib']
45
47
 
46
- spec.add_dependency 'rice', '~> 4.0.4'
47
-
48
- spec.extensions = %w[ext/llama/extconf.rb]
48
+ spec.extensions = %w[ext/extconf.rb]
49
49
  spec.metadata['rubygems_mfa_required'] = 'true'
50
50
  end
data/llama.cpp/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Georgi Gerganov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,175 @@
1
+ ifndef UNAME_S
2
+ UNAME_S := $(shell uname -s)
3
+ endif
4
+
5
+ ifndef UNAME_P
6
+ UNAME_P := $(shell uname -p)
7
+ endif
8
+
9
+ ifndef UNAME_M
10
+ UNAME_M := $(shell uname -m)
11
+ endif
12
+
13
+ CCV := $(shell $(CC) --version | head -n 1)
14
+ CXXV := $(shell $(CXX) --version | head -n 1)
15
+
16
+ # Mac OS + Arm can report x86_64
17
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
18
+ ifeq ($(UNAME_S),Darwin)
19
+ ifneq ($(UNAME_P),arm)
20
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
21
+ ifeq ($(SYSCTL_M),1)
22
+ # UNAME_P := arm
23
+ # UNAME_M := arm64
24
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
25
+ endif
26
+ endif
27
+ endif
28
+
29
+ #
30
+ # Compile flags
31
+ #
32
+
33
+ # keep standard at C11 and C++11
34
+ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
35
+ CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
36
+ LDFLAGS =
37
+
38
+ # warnings
39
+ CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
40
+ CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
41
+
42
+ # OS specific
43
+ # TODO: support Windows
44
+ ifeq ($(UNAME_S),Linux)
45
+ CFLAGS += -pthread
46
+ CXXFLAGS += -pthread
47
+ endif
48
+ ifeq ($(UNAME_S),Darwin)
49
+ CFLAGS += -pthread
50
+ CXXFLAGS += -pthread
51
+ endif
52
+ ifeq ($(UNAME_S),FreeBSD)
53
+ CFLAGS += -pthread
54
+ CXXFLAGS += -pthread
55
+ endif
56
+ ifeq ($(UNAME_S),NetBSD)
57
+ CFLAGS += -pthread
58
+ CXXFLAGS += -pthread
59
+ endif
60
+ ifeq ($(UNAME_S),OpenBSD)
61
+ CFLAGS += -pthread
62
+ CXXFLAGS += -pthread
63
+ endif
64
+ ifeq ($(UNAME_S),Haiku)
65
+ CFLAGS += -pthread
66
+ CXXFLAGS += -pthread
67
+ endif
68
+
69
+ # Architecture specific
70
+ # TODO: probably these flags need to be tweaked on some architectures
71
+ # feel free to update the Makefile for your architecture and send a pull request or issue
72
+ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
73
+ # Use all CPU extensions that are available:
74
+ CFLAGS += -march=native -mtune=native
75
+ CXXFLAGS += -march=native -mtune=native
76
+ endif
77
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
78
+ POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
79
+ ifneq (,$(findstring POWER9,$(POWER9_M)))
80
+ CFLAGS += -mcpu=power9
81
+ CXXFLAGS += -mcpu=power9
82
+ endif
83
+ # Require c++23's std::byteswap for big-endian support.
84
+ ifeq ($(UNAME_M),ppc64)
85
+ CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
86
+ endif
87
+ endif
88
+ ifndef LLAMA_NO_ACCELERATE
89
+ # Mac M1 - include Accelerate framework.
90
+ # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
91
+ ifeq ($(UNAME_S),Darwin)
92
+ CFLAGS += -DGGML_USE_ACCELERATE
93
+ LDFLAGS += -framework Accelerate
94
+ endif
95
+ endif
96
+ ifdef LLAMA_OPENBLAS
97
+ CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
98
+ LDFLAGS += -lopenblas
99
+ endif
100
+ ifdef LLAMA_GPROF
101
+ CFLAGS += -pg
102
+ CXXFLAGS += -pg
103
+ endif
104
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
105
+ CFLAGS += -mcpu=native
106
+ CXXFLAGS += -mcpu=native
107
+ endif
108
+ ifneq ($(filter armv6%,$(UNAME_M)),)
109
+ # Raspberry Pi 1, 2, 3
110
+ CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
111
+ endif
112
+ ifneq ($(filter armv7%,$(UNAME_M)),)
113
+ # Raspberry Pi 4
114
+ CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
115
+ endif
116
+ ifneq ($(filter armv8%,$(UNAME_M)),)
117
+ # Raspberry Pi 4
118
+ CFLAGS += -mfp16-format=ieee -mno-unaligned-access
119
+ endif
120
+
121
+ #
122
+ # Print build information
123
+ #
124
+
125
+ $(info I llama.cpp build info: )
126
+ $(info I UNAME_S: $(UNAME_S))
127
+ $(info I UNAME_P: $(UNAME_P))
128
+ $(info I UNAME_M: $(UNAME_M))
129
+ $(info I CFLAGS: $(CFLAGS))
130
+ $(info I CXXFLAGS: $(CXXFLAGS))
131
+ $(info I LDFLAGS: $(LDFLAGS))
132
+ $(info I CC: $(CCV))
133
+ $(info I CXX: $(CXXV))
134
+ $(info )
135
+
136
+ default: main quantize perplexity embedding
137
+
138
+ #
139
+ # Build library
140
+ #
141
+
142
+ ggml.o: ggml.c ggml.h
143
+ $(CC) $(CFLAGS) -c ggml.c -o ggml.o
144
+
145
+ llama.o: llama.cpp llama.h
146
+ $(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
147
+
148
+ common.o: examples/common.cpp examples/common.h
149
+ $(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
150
+
151
+ clean:
152
+ rm -vf *.o main quantize perplexity embedding
153
+
154
+ main: examples/main/main.cpp ggml.o llama.o common.o
155
+ $(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
156
+ @echo
157
+ @echo '==== Run ./main -h for help. ===='
158
+ @echo
159
+
160
+ quantize: examples/quantize/quantize.cpp ggml.o llama.o
161
+ $(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
162
+
163
+ perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
164
+ $(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
165
+
166
+ embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
167
+ $(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
168
+
169
+ #
170
+ # Tests
171
+ #
172
+
173
+ .PHONY: tests
174
+ tests:
175
+ bash ./tests/run-tests.sh