RubyGems - llama-rb - Versions diffs - 0.1.0 → 0.2.1 - Mend

llama-rb 0.1.0 → 0.2.1

Files changed (24) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/Gemfile.lock +1 -3
data/README.md +2 -8
data/bin/console +7 -0
data/ext/Makefile +4 -0
data/ext/extconf.rb +10 -0
data/lib/llama/model.rb +36 -64
data/lib/llama/version.rb +1 -1
data/lib/llama.rb +0 -1
data/llama-rb.gemspec +25 -25
data/llama.cpp/LICENSE +21 -0
data/llama.cpp/Makefile +175 -0
data/llama.cpp/README.md +389 -0
data/{ext/llama → llama.cpp/examples}/common.cpp +10 -3
data/llama.cpp/examples/main/main.cpp +460 -0
data/{ext/llama → llama.cpp}/ggml.c +587 -485
data/{ext/llama → llama.cpp}/ggml.h +36 -26
data/{ext/llama → llama.cpp}/llama.cpp +85 -35
data/{ext/llama → llama.cpp}/llama.h +17 -0
metadata +18 -27
data/ext/llama/extconf.rb +0 -12
data/ext/llama/model.cpp +0 -192
/data/{ext/llama → llama.cpp/examples}/common.h +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
-  data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
+  metadata.gz: '03801e4f99933be9c0e8d559008626991535c2167af88c8cb31defb31c88d0f6'
+  data.tar.gz: 6f17e50818de906f33de2686cf1b75c0e17aa052f0fba60889bad85df0591f59
 SHA512:
-  metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
-  data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5
+  metadata.gz: 40602fc8c253087a78fd4e5edf5fbae24f3a4ad0d9a3bb2f6730ef701753f6815e8716303220e8edcb1984484d5ffbd20c6adb7e07690244cd738ec6918c80e8
+  data.tar.gz: 9cbf6bed4fa4359bd007d083f99976a885b1557b0bf01c4d22a55e231515adf7f66e58e951e01bf731e827b893bf6fc278a306f8a566be3e133039f210214bc2

data/Gemfile CHANGED Viewed

@@ -1,4 +1,4 @@
-source "https://rubygems.org"
+source 'https://rubygems.org'
 gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,8 +1,7 @@
 PATH
   remote: .
   specs:
-    llama-rb (0.1.0)
-      rice (~> 4.0.4)
+    llama-rb (0.2.0)
 GEM
   remote: https://rubygems.org/
@@ -26,7 +25,6 @@ GEM
     rake (13.0.6)
     regexp_parser (2.7.0)
     rexml (3.2.5)
-    rice (4.0.4)
     rspec (3.12.0)
       rspec-core (~> 3.12.0)
       rspec-expectations (~> 3.12.0)

data/README.md CHANGED Viewed

@@ -42,21 +42,15 @@ m.predict('hello world')
 ```ruby
 def self.new(
   model,               # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
-  n_ctx: 512,          # context size
-  n_parts: -1,         # amount of model parts (-1 = determine from model dimensions)
+  n_predict: 128       # number of tokens to predict
   seed: Time.now.to_i, # RNG seed
-  memory_f16: true,    # use f16 instead of f32 for memory kv
-  use_mlock: false     # use mlock to keep model in memory
 )
 ```
 #### Llama::Model#predict
 ```ruby
-def predict(
-  prompt,        # string used as prompt
-  n_predict: 128 # number of tokens to predict
-)
+def predict(prompt)
 ```
 ## Development

data/bin/console ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'llama'
+require 'irb'
+IRB.start(__FILE__)

data/ext/Makefile ADDED Viewed

@@ -0,0 +1,4 @@
+# dummy file to make gem installer happy
+all:
+clean:
+install:

data/ext/extconf.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'fileutils'
+root = File.dirname(__FILE__)
+llama_root = File.join(root, '..', 'llama.cpp')
+main = File.join(root, '..', 'bin', 'llama')
+llama_main = File.join(llama_root, 'main')
+Dir.chdir(llama_root) { system('make main', exception: true) }
+FileUtils.cp(llama_main, main)

data/lib/llama/model.rb CHANGED Viewed

@@ -1,86 +1,58 @@
-require 'tempfile'
+require 'open3'
+require 'shellwords'
 module Llama
   class Model
-    # move methods defined in `model.cpp` from public to private
-    private :initialize_cpp, :predict_cpp
+    class ModelError < StandardError
+    end
-    # rubocop:disable Metrics/MethodLength
-    def self.new(
-      model,               # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
-      n_ctx: 512,          # context size
-      n_parts: -1,         # amount of model parts (-1 = determine from model dimensions)
-      seed: Time.now.to_i, # RNG seed
-      memory_f16: true,    # use f16 instead of f32 for memory kv
-      use_mlock: false     # use mlock to keep model in memory
+    def initialize(
+      model,
+      seed: Time.now.to_i,
+      n_predict: 128,
+      binary: default_binary
     )
-      instance = allocate
-      instance.instance_eval do
-        initialize
-        @model = model
-        @n_ctx = n_ctx
-        @n_parts = n_parts
-        @seed = seed
-        @memory_f16 = memory_f16
-        @use_mlock = use_mlock
-        capture_stderr do
-          initialize_cpp(
-            model,
-            n_ctx,
-            n_parts,
-            seed,
-            memory_f16,
-            use_mlock,
-          )
-        end
-      end
-      instance
+      @model = model
+      @seed = seed
+      @n_predict = n_predict
+      @binary = binary
     end
-    # rubocop:enable Metrics/MethodLength
-    def predict(
-      prompt,        # string used as prompt
-      n_predict: 128 # number of tokens to predict
-    )
-      text = ''
+    def predict(prompt)
+      stdout, @stderr, @status = Open3.capture3(command(prompt))
-      capture_stderr { text = predict_cpp(prompt, n_predict) }
+      raise ModelError, "Error #{status.to_i}" unless status.success?
-      process_text(text)
+      # remove the space that is added as a tokenizer hack in examples/main/main.cpp
+      stdout[0] = ''
+      stdout
     end
-    attr_reader :model, :n_ctx, :n_parts, :seed, :memory_f16, :use_mlock, :stderr
+    attr_reader :model, :seed, :n_predict, :binary
     private
-    def capture_stderr
-      previous = $stderr.dup
-      tmp = Tempfile.open('llama-rb-stderr')
-      begin
-        $stderr.reopen(tmp)
+    attr_reader :stderr, :status
-        yield
-        tmp.rewind
-        @stderr = tmp.read
-      ensure
-        tmp.close(true)
-        $stderr.reopen(previous)
-      end
+    def default_binary
+      File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
     end
-    def process_text(text)
-      text = text.force_encoding(Encoding.default_external)
+    def command(prompt)
+      escape_command(binary,
+        model: model,
+        prompt: prompt,
+        seed: seed,
+        n_predict: n_predict)
+    end
-      # remove the space that was added as a tokenizer hack in model.cpp
-      text[0] = '' if text.size.positive?
+    def escape_command(command, **flags)
+      flags_string = flags.map do |key, value|
+        "--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
+      end.join(' ')
+      command_string = Shellwords.escape(command)
-      text
+      "#{command_string} #{flags_string}"
     end
   end
 end

data/lib/llama/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Llama
-  VERSION = '0.1.0'.freeze
+  VERSION = '0.2.1'.freeze
 end

data/lib/llama.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require_relative 'llama/version'
-require_relative '../ext/llama/model'
 require_relative 'llama/model'
 module Llama

data/llama-rb.gemspec CHANGED Viewed

@@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
   spec.version = Llama::VERSION
   spec.licenses = ['MIT']
   spec.authors = ['zfletch']
-  spec.email = ['zfletch2@gmail.com']
+  spec.email = ['zf.rubygems@gmail.com']
   spec.summary = 'Ruby interface for Llama'
   spec.description = 'ggerganov/llama.cpp with Ruby hooks'
@@ -16,35 +16,35 @@ Gem::Specification.new do |spec|
   spec.metadata['source_code_uri'] = spec.homepage
   spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"
-  # Specify which files should be added to the gem when it is released.
-  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
   spec.files = [
-    "Gemfile",
-    "Gemfile.lock",
-    "LICENSE",
-    "README.md",
-    "Rakefile",
-    "ext/llama/common.cpp",
-    "ext/llama/common.h",
-    "ext/llama/extconf.rb",
-    "ext/llama/ggml.c",
-    "ext/llama/ggml.h",
-    "ext/llama/llama.cpp",
-    "ext/llama/llama.h",
-    "ext/llama/model.cpp",
-    "lib/llama.rb",
-    "lib/llama/model.rb",
-    "lib/llama/version.rb",
-    "llama-rb.gemspec",
-    "llama.cpp",
-    "models/.gitkeep",
+    'Gemfile',
+    'Gemfile.lock',
+    'LICENSE',
+    'README.md',
+    'Rakefile',
+    'bin/console',
+    'ext/extconf.rb',
+    'ext/Makefile',
+    'lib/llama.rb',
+    'lib/llama/model.rb',
+    'lib/llama/version.rb',
+    'llama-rb.gemspec',
+    'llama.cpp/LICENSE',
+    'llama.cpp/Makefile',
+    'llama.cpp/README.md',
+    'llama.cpp/examples/common.cpp',
+    'llama.cpp/examples/common.h',
+    'llama.cpp/examples/main/main.cpp',
+    'llama.cpp/ggml.c',
+    'llama.cpp/ggml.h',
+    'llama.cpp/llama.cpp',
+    'llama.cpp/llama.h',
+    'models/.gitkeep',
   ]
   spec.bindir = 'exe'
   spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
   spec.require_paths = ['lib']
-  spec.add_dependency 'rice', '~> 4.0.4'
-  spec.extensions = %w[ext/llama/extconf.rb]
+  spec.extensions = %w[ext/extconf.rb]
   spec.metadata['rubygems_mfa_required'] = 'true'
 end

data/llama.cpp/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Georgi Gerganov
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/llama.cpp/Makefile ADDED Viewed

@@ -0,0 +1,175 @@
+ifndef UNAME_S
+UNAME_S := $(shell uname -s)
+endif
+ifndef UNAME_P
+UNAME_P := $(shell uname -p)
+endif
+ifndef UNAME_M
+UNAME_M := $(shell uname -m)
+endif
+CCV := $(shell $(CC) --version | head -n 1)
+CXXV := $(shell $(CXX) --version | head -n 1)
+# Mac OS + Arm can report x86_64
+# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
+ifeq ($(UNAME_S),Darwin)
+	ifneq ($(UNAME_P),arm)
+		SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
+		ifeq ($(SYSCTL_M),1)
+			# UNAME_P := arm
+			# UNAME_M := arm64
+			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
+		endif
+	endif
+endif
+#
+# Compile flags
+#
+# keep standard at C11 and C++11
+CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
+CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
+LDFLAGS  =
+# warnings
+CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+# OS specific
+# TODO: support Windows
+ifeq ($(UNAME_S),Linux)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Darwin)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),FreeBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),NetBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),OpenBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Haiku)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+# Architecture specific
+# TODO: probably these flags need to be tweaked on some architectures
+#       feel free to update the Makefile for your architecture and send a pull request or issue
+ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
+	# Use all CPU extensions that are available:
+	CFLAGS += -march=native -mtune=native
+	CXXFLAGS += -march=native -mtune=native
+endif
+ifneq ($(filter ppc64%,$(UNAME_M)),)
+	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
+	ifneq (,$(findstring POWER9,$(POWER9_M)))
+		CFLAGS += -mcpu=power9
+		CXXFLAGS += -mcpu=power9
+	endif
+	# Require c++23's std::byteswap for big-endian support.
+	ifeq ($(UNAME_M),ppc64)
+		CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
+	endif
+endif
+ifndef LLAMA_NO_ACCELERATE
+	# Mac M1 - include Accelerate framework.
+	# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
+	ifeq ($(UNAME_S),Darwin)
+		CFLAGS  += -DGGML_USE_ACCELERATE
+		LDFLAGS += -framework Accelerate
+	endif
+endif
+ifdef LLAMA_OPENBLAS
+	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
+	LDFLAGS += -lopenblas
+endif
+ifdef LLAMA_GPROF
+	CFLAGS   += -pg
+	CXXFLAGS += -pg
+endif
+ifneq ($(filter aarch64%,$(UNAME_M)),)
+	CFLAGS += -mcpu=native
+	CXXFLAGS += -mcpu=native
+endif
+ifneq ($(filter armv6%,$(UNAME_M)),)
+	# Raspberry Pi 1, 2, 3
+	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+endif
+ifneq ($(filter armv7%,$(UNAME_M)),)
+	# Raspberry Pi 4
+	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
+endif
+ifneq ($(filter armv8%,$(UNAME_M)),)
+	# Raspberry Pi 4
+	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
+endif
+#
+# Print build information
+#
+$(info I llama.cpp build info: )
+$(info I UNAME_S:  $(UNAME_S))
+$(info I UNAME_P:  $(UNAME_P))
+$(info I UNAME_M:  $(UNAME_M))
+$(info I CFLAGS:   $(CFLAGS))
+$(info I CXXFLAGS: $(CXXFLAGS))
+$(info I LDFLAGS:  $(LDFLAGS))
+$(info I CC:       $(CCV))
+$(info I CXX:      $(CXXV))
+$(info )
+default: main quantize perplexity embedding
+#
+# Build library
+#
+ggml.o: ggml.c ggml.h
+	$(CC)  $(CFLAGS)   -c ggml.c -o ggml.o
+llama.o: llama.cpp llama.h
+	$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
+common.o: examples/common.cpp examples/common.h
+	$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
+clean:
+	rm -vf *.o main quantize perplexity embedding
+main: examples/main/main.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
+	@echo
+	@echo '====  Run ./main -h for help.  ===='
+	@echo
+quantize: examples/quantize/quantize.cpp ggml.o llama.o
+	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
+perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
+embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
+#
+# Tests
+#
+.PHONY: tests
+tests:
+	bash ./tests/run-tests.sh