RubyGems - llama-rb - Versions diffs - 0.1.0 - Mend

llama-rb 0.1.0

Files changed (20) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
+  data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
+SHA512:
+  metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
+  data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5

data/Gemfile ADDED Viewed

@@ -0,0 +1,10 @@
+source "https://rubygems.org"
+gemspec
+gem 'bundler-audit'
+gem 'gnar-style'
+gem 'rake'
+gem 'rspec'
+gem 'rubocop-rake', require: false
+gem 'rubocop-rspec', require: false

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,85 @@
+PATH
+  remote: .
+  specs:
+    llama-rb (0.1.0)
+      rice (~> 4.0.4)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    ast (2.4.2)
+    bundler-audit (0.9.1)
+      bundler (>= 1.2.0, < 3)
+      thor (~> 1.0)
+    diff-lcs (1.5.0)
+    gnar-style (0.13.0)
+      rubocop (>= 1.0.0, < 2.0)
+      rubocop-performance
+      rubocop-rails (~> 2.2.0)
+      thor
+    json (2.6.3)
+    parallel (1.22.1)
+    parser (3.2.2.0)
+      ast (~> 2.4.1)
+    rack (3.0.7)
+    rainbow (3.1.1)
+    rake (13.0.6)
+    regexp_parser (2.7.0)
+    rexml (3.2.5)
+    rice (4.0.4)
+    rspec (3.12.0)
+      rspec-core (~> 3.12.0)
+      rspec-expectations (~> 3.12.0)
+      rspec-mocks (~> 3.12.0)
+    rspec-core (3.12.1)
+      rspec-support (~> 3.12.0)
+    rspec-expectations (3.12.2)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-mocks (3.12.5)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-support (3.12.0)
+    rubocop (1.48.1)
+      json (~> 2.3)
+      parallel (~> 1.10)
+      parser (>= 3.2.0.0)
+      rainbow (>= 2.2.2, < 4.0)
+      regexp_parser (>= 1.8, < 3.0)
+      rexml (>= 3.2.5, < 4.0)
+      rubocop-ast (>= 1.26.0, < 2.0)
+      ruby-progressbar (~> 1.7)
+      unicode-display_width (>= 2.4.0, < 3.0)
+    rubocop-ast (1.28.0)
+      parser (>= 3.2.1.0)
+    rubocop-capybara (2.17.1)
+      rubocop (~> 1.41)
+    rubocop-performance (1.16.0)
+      rubocop (>= 1.7.0, < 2.0)
+      rubocop-ast (>= 0.4.0)
+    rubocop-rails (2.2.1)
+      rack (>= 1.1)
+      rubocop (>= 0.72.0)
+    rubocop-rake (0.6.0)
+      rubocop (~> 1.0)
+    rubocop-rspec (2.19.0)
+      rubocop (~> 1.33)
+      rubocop-capybara (~> 2.17)
+    ruby-progressbar (1.13.0)
+    thor (1.2.1)
+    unicode-display_width (2.4.2)
+PLATFORMS
+  arm64-darwin-21
+DEPENDENCIES
+  bundler-audit
+  gnar-style
+  llama-rb!
+  rake
+  rspec
+  rubocop-rake
+  rubocop-rspec
+BUNDLED WITH
+   2.4.5

data/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2023
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,81 @@
+# Llama-rb
+Ruby wrapper for
+[llama.cpp](https://github.com/ggerganov/llama.cpp).
+This was hacked together in a weekend and versions `0.x.x` should be considered unstable.
+## Installation
+Install the gem and add to the application's Gemfile by executing:
+```
+$ bundle add llama-rb
+```
+If bundler is not being used to manage dependencies, install the gem by executing:
+```
+$ gem install llama-rb
+```
+## Usage
+### Models
+Before using this code, you will need to download and process at least one. See
+[ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp#obtaining-and-verifying-the-facebook-llama-original-model-and-stanford-alpaca-model-data).
+### Example
+```ruby
+require 'llama'
+m = Llama::Model.new('models/7B/ggml-model-q4_0.bin')
+m.predict('hello world')
+```
+### API
+#### Llama::Model.new
+```ruby
+def self.new(
+  model,               # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
+  n_ctx: 512,          # context size
+  n_parts: -1,         # amount of model parts (-1 = determine from model dimensions)
+  seed: Time.now.to_i, # RNG seed
+  memory_f16: true,    # use f16 instead of f32 for memory kv
+  use_mlock: false     # use mlock to keep model in memory
+)
+```
+#### Llama::Model#predict
+```ruby
+def predict(
+  prompt,        # string used as prompt
+  n_predict: 128 # number of tokens to predict
+)
+```
+## Development
+```
+git clone --recurse-submodules https://github.com/zfletch/llama-rb
+cd llama-rb
+./bin/setup
+```
+After checking out the repo, run `bin/setup` to install dependencies.
+Then, run `rake spec` to run the tests.
+You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+To install this gem onto your local machine, run `bundle exec rake install`.
+To release a new version, update the version number in `version.rb`, and then run
+`bundle exec rake release`, which will create a git tag for the version, push git
+commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+## Contributing
+Bug reports and pull requests are welcome on GitHub at https://github.com/zfletch/llama-rb.

data/Rakefile ADDED Viewed

@@ -0,0 +1,10 @@
+require 'bundler/gem_tasks'
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+require 'rubocop/rake_task'
+RuboCop::RakeTask.new
+task default: [:spec, :rubocop]

data/ext/llama/common.cpp ADDED Viewed

@@ -0,0 +1,311 @@
+#include "common.h"
+#include "ggml.h"
+#include <cassert>
+#include <cstring>
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#include <malloc.h> // using malloc.h with MSC/MINGW
+#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
+#include <alloca.h>
+#endif
+#if defined (_WIN32)
+#pragma comment(lib,"kernel32.lib")
+extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
+extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
+extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
+extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
+extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
+#endif
+bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
+    // determine sensible default number of threads.
+    // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
+#ifdef __linux__
+    std::ifstream cpuinfo("/proc/cpuinfo");
+    params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
+                                  std::istream_iterator<std::string>(),
+                                  std::string("processor"));
+#endif
+    if (params.n_threads == 0) {
+        params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
+    }
+    bool invalid_param = false;
+    std::string arg;
+    for (int i = 1; i < argc; i++) {
+        arg = argv[i];
+        if (arg == "-s" || arg == "--seed") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.seed = std::stoi(argv[i]);
+        } else if (arg == "-t" || arg == "--threads") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_threads = std::stoi(argv[i]);
+        } else if (arg == "-p" || arg == "--prompt") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.prompt = argv[i];
+        } else if (arg == "-f" || arg == "--file") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            std::ifstream file(argv[i]);
+            std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
+            if (params.prompt.back() == '\n') {
+                params.prompt.pop_back();
+            }
+        } else if (arg == "-n" || arg == "--n_predict") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_predict = std::stoi(argv[i]);
+        } else if (arg == "--top_k") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.top_k = std::stoi(argv[i]);
+        } else if (arg == "-c" || arg == "--ctx_size") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_ctx = std::stoi(argv[i]);
+        } else if (arg == "--memory_f32") {
+            params.memory_f16 = false;
+        } else if (arg == "--top_p") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.top_p = std::stof(argv[i]);
+        } else if (arg == "--temp") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.temp = std::stof(argv[i]);
+        } else if (arg == "--repeat_last_n") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.repeat_last_n = std::stoi(argv[i]);
+        } else if (arg == "--repeat_penalty") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.repeat_penalty = std::stof(argv[i]);
+        } else if (arg == "-b" || arg == "--batch_size") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_batch = std::stoi(argv[i]);
+            params.n_batch = std::min(512, params.n_batch);
+        } else if (arg == "--keep") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_keep = std::stoi(argv[i]);
+        } else if (arg == "-m" || arg == "--model") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.model = argv[i];
+        } else if (arg == "-i" || arg == "--interactive") {
+            params.interactive = true;
+        } else if (arg == "--embedding") {
+            params.embedding = true;
+        } else if (arg == "--interactive-start") {
+            params.interactive = true;
+        } else if (arg == "--interactive-first") {
+            params.interactive_start = true;
+        } else if (arg == "-ins" || arg == "--instruct") {
+            params.instruct = true;
+        } else if (arg == "--color") {
+            params.use_color = true;
+        } else if (arg == "--mlock") {
+            params.use_mlock = true;
+        } else if (arg == "--mtest") {
+            params.mem_test = true;
+        } else if (arg == "--verbose-prompt") {
+            params.verbose_prompt = true;
+        } else if (arg == "-r" || arg == "--reverse-prompt") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.antiprompt.push_back(argv[i]);
+        } else if (arg == "--perplexity") {
+            params.perplexity = true;
+        } else if (arg == "--ignore-eos") {
+            params.ignore_eos = true;
+        } else if (arg == "--n_parts") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_parts = std::stoi(argv[i]);
+        } else if (arg == "-h" || arg == "--help") {
+            gpt_print_usage(argc, argv, params);
+            exit(0);
+        } else if (arg == "--random-prompt") {
+            params.random_prompt = true;
+        } else if (arg == "--in-prefix") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.input_prefix = argv[i];
+        } else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            gpt_print_usage(argc, argv, params);
+            exit(1);
+        }
+    }
+    if (invalid_param) {
+        fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
+        gpt_print_usage(argc, argv, params);
+        exit(1);
+    }
+    return true;
+}
+void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
+    fprintf(stderr, "usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h, --help            show this help message and exit\n");
+    fprintf(stderr, "  -i, --interactive     run in interactive mode\n");
+    fprintf(stderr, "  --interactive-first   run in interactive mode and wait for input right away\n");
+    fprintf(stderr, "  -ins, --instruct      run in instruction mode (use with Alpaca models)\n");
+    fprintf(stderr, "  -r PROMPT, --reverse-prompt PROMPT\n");
+    fprintf(stderr, "                        run in interactive mode and poll user input upon seeing PROMPT (can be\n");
+    fprintf(stderr, "                        specified more than once for multiple prompts).\n");
+    fprintf(stderr, "  --color               colorise output to distinguish prompt and user input from generations\n");
+    fprintf(stderr, "  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for <= 0)\n");
+    fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
+    fprintf(stderr, "  -p PROMPT, --prompt PROMPT\n");
+    fprintf(stderr, "                        prompt to start generation with (default: empty)\n");
+    fprintf(stderr, "  --random-prompt       start with a randomized prompt.\n");
+    fprintf(stderr, "  --in-prefix STRING    string to prefix user inputs with (default: empty)\n");
+    fprintf(stderr, "  -f FNAME, --file FNAME\n");
+    fprintf(stderr, "                        prompt file to start generation.\n");
+    fprintf(stderr, "  -n N, --n_predict N   number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
+    fprintf(stderr, "  --top_k N             top-k sampling (default: %d)\n", params.top_k);
+    fprintf(stderr, "  --top_p N             top-p sampling (default: %.1f)\n", (double)params.top_p);
+    fprintf(stderr, "  --repeat_last_n N     last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n);
+    fprintf(stderr, "  --repeat_penalty N    penalize repeat sequence of tokens (default: %.1f)\n", (double)params.repeat_penalty);
+    fprintf(stderr, "  -c N, --ctx_size N    size of the prompt context (default: %d)\n", params.n_ctx);
+    fprintf(stderr, "  --ignore-eos          ignore end of stream token and continue generating\n");
+    fprintf(stderr, "  --memory_f32          use f32 instead of f16 for memory key+value\n");
+    fprintf(stderr, "  --temp N              temperature (default: %.1f)\n", (double)params.temp);
+    fprintf(stderr, "  --n_parts N           number of model parts (default: -1 = determine from dimensions)\n");
+    fprintf(stderr, "  -b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch);
+    fprintf(stderr, "  --perplexity          compute perplexity over the prompt\n");
+    fprintf(stderr, "  --keep                number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
+    if (ggml_mlock_supported()) {
+        fprintf(stderr, "  --mlock               force system to keep model in RAM rather than swapping or compressing\n");
+    }
+    fprintf(stderr, "  --mtest               compute maximum memory usage\n");
+    fprintf(stderr, "  --verbose-prompt      print prompt before generation\n");
+    fprintf(stderr, "  -m FNAME, --model FNAME\n");
+    fprintf(stderr, "                        model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "\n");
+}
+std::string gpt_random_prompt(std::mt19937 & rng) {
+    const int r = rng() % 10;
+    switch (r) {
+        case 0: return "So";
+        case 1: return "Once upon a time";
+        case 2: return "When";
+        case 3: return "The";
+        case 4: return "After";
+        case 5: return "If";
+        case 6: return "import";
+        case 7: return "He";
+        case 8: return "She";
+        case 9: return "They";
+        default: return "To";
+    }
+    return "The";
+}
+// TODO: not great allocating this every time
+std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
+    // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
+    std::vector<llama_token> res(text.size() + (int)add_bos);
+    int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    assert(n >= 0);
+    res.resize(n);
+    return res;
+}
+/* Keep track of current color of output, and emit ANSI code if it changes. */
+void set_console_color(console_state & con_st, console_color_t color) {
+    if (con_st.use_color && con_st.color != color) {
+        switch(color) {
+            case CONSOLE_COLOR_DEFAULT:
+                printf(ANSI_COLOR_RESET);
+                break;
+            case CONSOLE_COLOR_PROMPT:
+                printf(ANSI_COLOR_YELLOW);
+                break;
+            case CONSOLE_COLOR_USER_INPUT:
+                printf(ANSI_BOLD ANSI_COLOR_GREEN);
+                break;
+        }
+        con_st.color = color;
+    }
+}
+#if defined (_WIN32)
+void win32_console_init(bool enable_color) {
+    unsigned long dwMode = 0;
+    void* hConOut = GetStdHandle((unsigned long)-11); // STD_OUTPUT_HANDLE (-11)
+    if (!hConOut || hConOut == (void*)-1 || !GetConsoleMode(hConOut, &dwMode)) {
+        hConOut = GetStdHandle((unsigned long)-12); // STD_ERROR_HANDLE (-12)
+        if (hConOut && (hConOut == (void*)-1 || !GetConsoleMode(hConOut, &dwMode))) {
+            hConOut = 0;
+        }
+    }
+    if (hConOut) {
+        // Enable ANSI colors on Windows 10+
+        if (enable_color && !(dwMode & 0x4)) {
+            SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
+        }
+        // Set console output codepage to UTF8
+        SetConsoleOutputCP(65001); // CP_UTF8
+    }
+    void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
+    if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
+        // Set console input codepage to UTF8
+        SetConsoleCP(65001); // CP_UTF8
+    }
+}
+#endif

data/ext/llama/common.h ADDED Viewed

@@ -0,0 +1,95 @@
+// Various helper functions and utilities
+#pragma once
+#include "llama.h"
+#include <string>
+#include <vector>
+#include <random>
+#include <thread>
+//
+// CLI argument parsing
+//
+struct gpt_params {
+    int32_t seed          = -1;   // RNG seed
+    int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t n_predict     = 128;  // new tokens to predict
+    int32_t repeat_last_n = 64;   // last n tokens to penalize
+    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
+    int32_t n_ctx         = 512;  // context size
+    int32_t n_batch       = 8;    // batch size for prompt processing
+    int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
+    // sampling parameters
+    int32_t top_k = 40;
+    float   top_p = 0.95f;
+    float   temp  = 0.80f;
+    float   repeat_penalty  = 1.10f;
+    std::string model  = "models/lamma-7B/ggml-model.bin"; // model path
+    std::string prompt = "";
+    std::string input_prefix = ""; // string to prefix user inputs with
+    std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
+    bool memory_f16        = true;  // use f16 instead of f32 for memory kv
+    bool random_prompt     = false; // do not randomize prompt if none provided
+    bool use_color         = false; // use color to distinguish generations and inputs
+    bool interactive       = false; // interactive mode
+    bool embedding         = false; // get only sentence embedding
+    bool interactive_start = false; // wait for user input immediately
+    bool instruct          = false; // instruction mode (used for Alpaca models)
+    bool ignore_eos        = false; // do not stop generating after eos
+    bool perplexity        = false; // compute perplexity over the prompt
+    bool use_mlock         = false; // use mlock to keep model in memory
+    bool mem_test          = false; // compute maximum memory usage
+    bool verbose_prompt    = false; // print prompt tokens before generation
+};
+bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
+void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
+std::string gpt_random_prompt(std::mt19937 & rng);
+//
+// Vocab utils
+//
+std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
+//
+// Console utils
+//
+#define ANSI_COLOR_RED     "\x1b[31m"
+#define ANSI_COLOR_GREEN   "\x1b[32m"
+#define ANSI_COLOR_YELLOW  "\x1b[33m"
+#define ANSI_COLOR_BLUE    "\x1b[34m"
+#define ANSI_COLOR_MAGENTA "\x1b[35m"
+#define ANSI_COLOR_CYAN    "\x1b[36m"
+#define ANSI_COLOR_RESET   "\x1b[0m"
+#define ANSI_BOLD          "\x1b[1m"
+enum console_color_t {
+    CONSOLE_COLOR_DEFAULT=0,
+    CONSOLE_COLOR_PROMPT,
+    CONSOLE_COLOR_USER_INPUT
+};
+struct console_state {
+    bool use_color = false;
+    console_color_t color = CONSOLE_COLOR_DEFAULT;
+};
+void set_console_color(console_state & con_st, console_color_t color);
+#if defined (_WIN32)
+void win32_console_init(bool enable_color);
+#endif

data/ext/llama/extconf.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require 'mkmf-rice'
+# Compile llama.cpp
+# root = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
+# llama_cpp = File.join(root, 'llama.cpp')
+#
+# Dir.chdir(llama_cpp) do
+#   system("make", exception: true)
+# end
+# Create Makefile for Ruby bindings
+create_makefile 'llama/model'