RubyGems - llama-rb - Versions diffs - 0.1.0 → 0.2.0 - Mend

llama-rb 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/Gemfile.lock +1 -3
data/bin/console +7 -0
data/ext/Makefile +4 -0
data/ext/extconf.rb +10 -0
data/lib/llama/model.rb +36 -64
data/lib/llama/version.rb +1 -1
data/lib/llama.rb +0 -1
data/llama-rb.gemspec +25 -25
data/llama.cpp/LICENSE +21 -0
data/llama.cpp/Makefile +175 -0
data/llama.cpp/README.md +389 -0
data/{ext/llama → llama.cpp/examples}/common.cpp +10 -3
data/llama.cpp/examples/main/main.cpp +460 -0
data/{ext/llama → llama.cpp}/ggml.c +587 -485
data/{ext/llama → llama.cpp}/ggml.h +36 -26
data/{ext/llama → llama.cpp}/llama.cpp +85 -35
data/{ext/llama → llama.cpp}/llama.h +17 -0
metadata +18 -27
data/ext/llama/extconf.rb +0 -12
data/ext/llama/model.cpp +0 -192
/data/{ext/llama → llama.cpp/examples}/common.h +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
-  data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
+  metadata.gz: 1371348a7ba9c4fa75ada41ec8afc6461e1d56dae2c3e3dede175d189ecdd7ea
+  data.tar.gz: b45a9ed3c28a228a2405ec8874f4cf8239dfcb4cb3132e7a44be806b5c6a2a78
 SHA512:
-  metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
-  data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5
+  metadata.gz: 88dd6f7a6f971f60753625dce11b469bbf46f606b4be4c8d308636d1f696666cacd9b174bda65bc5e42d503db413c9f1281c9a7129d838f1dfab3088717f603f
+  data.tar.gz: 449673e8950cc869ad899500b85a6108d2a02b7915ca340733bda0f18fa49691df7e839a6efece440d76a0583d037c90a6226f505eacc08ba24a9ae510b840bc

data/Gemfile CHANGED Viewed

@@ -1,4 +1,4 @@
-source "https://rubygems.org"
+source 'https://rubygems.org'
 gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,8 +1,7 @@
 PATH
   remote: .
   specs:
-    llama-rb (0.1.0)
-      rice (~> 4.0.4)
+    llama-rb (0.2.0)
 GEM
   remote: https://rubygems.org/
@@ -26,7 +25,6 @@ GEM
     rake (13.0.6)
     regexp_parser (2.7.0)
     rexml (3.2.5)
-    rice (4.0.4)
     rspec (3.12.0)
       rspec-core (~> 3.12.0)
       rspec-expectations (~> 3.12.0)

data/bin/console ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'llama'
+require 'irb'
+IRB.start(__FILE__)

data/ext/Makefile ADDED Viewed

@@ -0,0 +1,4 @@
+# dummy file to make gem installer happy
+all:
+clean:
+install:

data/ext/extconf.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'fileutils'
+root = File.dirname(__FILE__)
+llama_root = File.join(root, '..', 'llama.cpp')
+main = File.join(root, '..', 'bin', 'llama')
+llama_main = File.join(llama_root, 'main')
+Dir.chdir(llama_root) { system('make main', exception: true) }
+FileUtils.cp(llama_main, main)

data/lib/llama/model.rb CHANGED Viewed

@@ -1,86 +1,58 @@
-require 'tempfile'
+require 'open3'
+require 'shellwords'
 module Llama
   class Model
-    # move methods defined in `model.cpp` from public to private
-    private :initialize_cpp, :predict_cpp
+    class ModelError < StandardError
+    end
-    # rubocop:disable Metrics/MethodLength
-    def self.new(
-      model,               # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
-      n_ctx: 512,          # context size
-      n_parts: -1,         # amount of model parts (-1 = determine from model dimensions)
-      seed: Time.now.to_i, # RNG seed
-      memory_f16: true,    # use f16 instead of f32 for memory kv
-      use_mlock: false     # use mlock to keep model in memory
+    def initialize(
+      model,
+      seed: Time.now.to_i,
+      n_predict: 128,
+      binary: default_binary
     )
-      instance = allocate
-      instance.instance_eval do
-        initialize
-        @model = model
-        @n_ctx = n_ctx
-        @n_parts = n_parts
-        @seed = seed
-        @memory_f16 = memory_f16
-        @use_mlock = use_mlock
-        capture_stderr do
-          initialize_cpp(
-            model,
-            n_ctx,
-            n_parts,
-            seed,
-            memory_f16,
-            use_mlock,
-          )
-        end
-      end
-      instance
+      @model = model
+      @seed = seed
+      @n_predict = n_predict
+      @binary = binary
     end
-    # rubocop:enable Metrics/MethodLength
-    def predict(
-      prompt,        # string used as prompt
-      n_predict: 128 # number of tokens to predict
-    )
-      text = ''
+    def predict(prompt)
+      stdout, @stderr, @status = Open3.capture3(command(prompt))
-      capture_stderr { text = predict_cpp(prompt, n_predict) }
+      raise ModelError, "Error #{status.to_i}" unless status.success?
-      process_text(text)
+      # remove the space that is added as a tokenizer hack in examples/main/main.cpp
+      stdout[0] = ''
+      stdout
     end
-    attr_reader :model, :n_ctx, :n_parts, :seed, :memory_f16, :use_mlock, :stderr
+    attr_reader :model, :seed, :n_predict, :binary
     private
-    def capture_stderr
-      previous = $stderr.dup
-      tmp = Tempfile.open('llama-rb-stderr')
-      begin
-        $stderr.reopen(tmp)
+    attr_reader :stderr, :status
-        yield
-        tmp.rewind
-        @stderr = tmp.read
-      ensure
-        tmp.close(true)
-        $stderr.reopen(previous)
-      end
+    def default_binary
+      File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
     end
-    def process_text(text)
-      text = text.force_encoding(Encoding.default_external)
+    def command(prompt)
+      escape_command(binary,
+        model: model,
+        prompt: prompt,
+        seed: seed,
+        n_predict: n_predict)
+    end
-      # remove the space that was added as a tokenizer hack in model.cpp
-      text[0] = '' if text.size.positive?
+    def escape_command(command, **flags)
+      flags_string = flags.map do |key, value|
+        "--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
+      end.join(' ')
+      command_string = Shellwords.escape(command)
-      text
+      "#{command_string} #{flags_string}"
     end
   end
 end

data/lib/llama/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Llama
-  VERSION = '0.1.0'.freeze
+  VERSION = '0.2.0'.freeze
 end

data/lib/llama.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require_relative 'llama/version'
-require_relative '../ext/llama/model'
 require_relative 'llama/model'
 module Llama

data/llama-rb.gemspec CHANGED Viewed

@@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
   spec.version = Llama::VERSION
   spec.licenses = ['MIT']
   spec.authors = ['zfletch']
-  spec.email = ['zfletch2@gmail.com']
+  spec.email = ['zf.rubygems@gmail.com']
   spec.summary = 'Ruby interface for Llama'
   spec.description = 'ggerganov/llama.cpp with Ruby hooks'
@@ -16,35 +16,35 @@ Gem::Specification.new do |spec|
   spec.metadata['source_code_uri'] = spec.homepage
   spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"
-  # Specify which files should be added to the gem when it is released.
-  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
   spec.files = [
-    "Gemfile",
-    "Gemfile.lock",
-    "LICENSE",
-    "README.md",
-    "Rakefile",
-    "ext/llama/common.cpp",
-    "ext/llama/common.h",
-    "ext/llama/extconf.rb",
-    "ext/llama/ggml.c",
-    "ext/llama/ggml.h",
-    "ext/llama/llama.cpp",
-    "ext/llama/llama.h",
-    "ext/llama/model.cpp",
-    "lib/llama.rb",
-    "lib/llama/model.rb",
-    "lib/llama/version.rb",
-    "llama-rb.gemspec",
-    "llama.cpp",
-    "models/.gitkeep",
+    'Gemfile',
+    'Gemfile.lock',
+    'LICENSE',
+    'README.md',
+    'Rakefile',
+    'bin/console',
+    'ext/extconf.rb',
+    'ext/Makefile',
+    'lib/llama.rb',
+    'lib/llama/model.rb',
+    'lib/llama/version.rb',
+    'llama-rb.gemspec',
+    'llama.cpp/LICENSE',
+    'llama.cpp/Makefile',
+    'llama.cpp/README.md',
+    'llama.cpp/examples/common.cpp',
+    'llama.cpp/examples/common.h',
+    'llama.cpp/examples/main/main.cpp',
+    'llama.cpp/ggml.c',
+    'llama.cpp/ggml.h',
+    'llama.cpp/llama.cpp',
+    'llama.cpp/llama.h',
+    'models/.gitkeep',
   ]
   spec.bindir = 'exe'
   spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
   spec.require_paths = ['lib']
-  spec.add_dependency 'rice', '~> 4.0.4'
-  spec.extensions = %w[ext/llama/extconf.rb]
+  spec.extensions = %w[ext/extconf.rb]
   spec.metadata['rubygems_mfa_required'] = 'true'
 end

data/llama.cpp/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Georgi Gerganov
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/llama.cpp/Makefile ADDED Viewed

@@ -0,0 +1,175 @@
+ifndef UNAME_S
+UNAME_S := $(shell uname -s)
+endif
+ifndef UNAME_P
+UNAME_P := $(shell uname -p)
+endif
+ifndef UNAME_M
+UNAME_M := $(shell uname -m)
+endif
+CCV := $(shell $(CC) --version | head -n 1)
+CXXV := $(shell $(CXX) --version | head -n 1)
+# Mac OS + Arm can report x86_64
+# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
+ifeq ($(UNAME_S),Darwin)
+	ifneq ($(UNAME_P),arm)
+		SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
+		ifeq ($(SYSCTL_M),1)
+			# UNAME_P := arm
+			# UNAME_M := arm64
+			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
+		endif
+	endif
+endif
+#
+# Compile flags
+#
+# keep standard at C11 and C++11
+CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
+CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
+LDFLAGS  =
+# warnings
+CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+# OS specific
+# TODO: support Windows
+ifeq ($(UNAME_S),Linux)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Darwin)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),FreeBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),NetBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),OpenBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Haiku)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+# Architecture specific
+# TODO: probably these flags need to be tweaked on some architectures
+#       feel free to update the Makefile for your architecture and send a pull request or issue
+ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
+	# Use all CPU extensions that are available:
+	CFLAGS += -march=native -mtune=native
+	CXXFLAGS += -march=native -mtune=native
+endif
+ifneq ($(filter ppc64%,$(UNAME_M)),)
+	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
+	ifneq (,$(findstring POWER9,$(POWER9_M)))
+		CFLAGS += -mcpu=power9
+		CXXFLAGS += -mcpu=power9
+	endif
+	# Require c++23's std::byteswap for big-endian support.
+	ifeq ($(UNAME_M),ppc64)
+		CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
+	endif
+endif
+ifndef LLAMA_NO_ACCELERATE
+	# Mac M1 - include Accelerate framework.
+	# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
+	ifeq ($(UNAME_S),Darwin)
+		CFLAGS  += -DGGML_USE_ACCELERATE
+		LDFLAGS += -framework Accelerate
+	endif
+endif
+ifdef LLAMA_OPENBLAS
+	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
+	LDFLAGS += -lopenblas
+endif
+ifdef LLAMA_GPROF
+	CFLAGS   += -pg
+	CXXFLAGS += -pg
+endif
+ifneq ($(filter aarch64%,$(UNAME_M)),)
+	CFLAGS += -mcpu=native
+	CXXFLAGS += -mcpu=native
+endif
+ifneq ($(filter armv6%,$(UNAME_M)),)
+	# Raspberry Pi 1, 2, 3
+	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+endif
+ifneq ($(filter armv7%,$(UNAME_M)),)
+	# Raspberry Pi 4
+	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
+endif
+ifneq ($(filter armv8%,$(UNAME_M)),)
+	# Raspberry Pi 4
+	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
+endif
+#
+# Print build information
+#
+$(info I llama.cpp build info: )
+$(info I UNAME_S:  $(UNAME_S))
+$(info I UNAME_P:  $(UNAME_P))
+$(info I UNAME_M:  $(UNAME_M))
+$(info I CFLAGS:   $(CFLAGS))
+$(info I CXXFLAGS: $(CXXFLAGS))
+$(info I LDFLAGS:  $(LDFLAGS))
+$(info I CC:       $(CCV))
+$(info I CXX:      $(CXXV))
+$(info )
+default: main quantize perplexity embedding
+#
+# Build library
+#
+ggml.o: ggml.c ggml.h
+	$(CC)  $(CFLAGS)   -c ggml.c -o ggml.o
+llama.o: llama.cpp llama.h
+	$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
+common.o: examples/common.cpp examples/common.h
+	$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
+clean:
+	rm -vf *.o main quantize perplexity embedding
+main: examples/main/main.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
+	@echo
+	@echo '====  Run ./main -h for help.  ===='
+	@echo
+quantize: examples/quantize/quantize.cpp ggml.o llama.o
+	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
+perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
+embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
+#
+# Tests
+#
+.PHONY: tests
+tests:
+	bash ./tests/run-tests.sh