llama-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
4
+ data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
5
+ SHA512:
6
+ metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
7
+ data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem 'bundler-audit'
6
+ gem 'gnar-style'
7
+ gem 'rake'
8
+ gem 'rspec'
9
+ gem 'rubocop-rake', require: false
10
+ gem 'rubocop-rspec', require: false
data/Gemfile.lock ADDED
@@ -0,0 +1,85 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ llama-rb (0.1.0)
5
+ rice (~> 4.0.4)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ ast (2.4.2)
11
+ bundler-audit (0.9.1)
12
+ bundler (>= 1.2.0, < 3)
13
+ thor (~> 1.0)
14
+ diff-lcs (1.5.0)
15
+ gnar-style (0.13.0)
16
+ rubocop (>= 1.0.0, < 2.0)
17
+ rubocop-performance
18
+ rubocop-rails (~> 2.2.0)
19
+ thor
20
+ json (2.6.3)
21
+ parallel (1.22.1)
22
+ parser (3.2.2.0)
23
+ ast (~> 2.4.1)
24
+ rack (3.0.7)
25
+ rainbow (3.1.1)
26
+ rake (13.0.6)
27
+ regexp_parser (2.7.0)
28
+ rexml (3.2.5)
29
+ rice (4.0.4)
30
+ rspec (3.12.0)
31
+ rspec-core (~> 3.12.0)
32
+ rspec-expectations (~> 3.12.0)
33
+ rspec-mocks (~> 3.12.0)
34
+ rspec-core (3.12.1)
35
+ rspec-support (~> 3.12.0)
36
+ rspec-expectations (3.12.2)
37
+ diff-lcs (>= 1.2.0, < 2.0)
38
+ rspec-support (~> 3.12.0)
39
+ rspec-mocks (3.12.5)
40
+ diff-lcs (>= 1.2.0, < 2.0)
41
+ rspec-support (~> 3.12.0)
42
+ rspec-support (3.12.0)
43
+ rubocop (1.48.1)
44
+ json (~> 2.3)
45
+ parallel (~> 1.10)
46
+ parser (>= 3.2.0.0)
47
+ rainbow (>= 2.2.2, < 4.0)
48
+ regexp_parser (>= 1.8, < 3.0)
49
+ rexml (>= 3.2.5, < 4.0)
50
+ rubocop-ast (>= 1.26.0, < 2.0)
51
+ ruby-progressbar (~> 1.7)
52
+ unicode-display_width (>= 2.4.0, < 3.0)
53
+ rubocop-ast (1.28.0)
54
+ parser (>= 3.2.1.0)
55
+ rubocop-capybara (2.17.1)
56
+ rubocop (~> 1.41)
57
+ rubocop-performance (1.16.0)
58
+ rubocop (>= 1.7.0, < 2.0)
59
+ rubocop-ast (>= 0.4.0)
60
+ rubocop-rails (2.2.1)
61
+ rack (>= 1.1)
62
+ rubocop (>= 0.72.0)
63
+ rubocop-rake (0.6.0)
64
+ rubocop (~> 1.0)
65
+ rubocop-rspec (2.19.0)
66
+ rubocop (~> 1.33)
67
+ rubocop-capybara (~> 2.17)
68
+ ruby-progressbar (1.13.0)
69
+ thor (1.2.1)
70
+ unicode-display_width (2.4.2)
71
+
72
+ PLATFORMS
73
+ arm64-darwin-21
74
+
75
+ DEPENDENCIES
76
+ bundler-audit
77
+ gnar-style
78
+ llama-rb!
79
+ rake
80
+ rspec
81
+ rubocop-rake
82
+ rubocop-rspec
83
+
84
+ BUNDLED WITH
85
+ 2.4.5
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,81 @@
1
+ # Llama-rb
2
+
3
+ Ruby wrapper for
4
+ [llama.cpp](https://github.com/ggerganov/llama.cpp).
5
+
6
+ This was hacked together in a weekend and versions `0.x.x` should be considered unstable.
7
+
8
+ ## Installation
9
+
10
+ Install the gem and add to the application's Gemfile by executing:
11
+
12
+ ```
13
+ $ bundle add llama-rb
14
+ ```
15
+
16
+ If bundler is not being used to manage dependencies, install the gem by executing:
17
+
18
+ ```
19
+ $ gem install llama-rb
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ### Models
25
+
26
+ Before using this code, you will need to download and process at least one. See
27
+ [ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp#obtaining-and-verifying-the-facebook-llama-original-model-and-stanford-alpaca-model-data).
28
+
29
+ ### Example
30
+
31
+ ```ruby
32
+ require 'llama'
33
+
34
+ m = Llama::Model.new('models/7B/ggml-model-q4_0.bin')
35
+ m.predict('hello world')
36
+ ```
37
+
38
+ ### API
39
+
40
+ #### Llama::Model.new
41
+
42
+ ```ruby
43
+ def self.new(
44
+ model, # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
45
+ n_ctx: 512, # context size
46
+ n_parts: -1, # amount of model parts (-1 = determine from model dimensions)
47
+ seed: Time.now.to_i, # RNG seed
48
+ memory_f16: true, # use f16 instead of f32 for memory kv
49
+ use_mlock: false # use mlock to keep model in memory
50
+ )
51
+ ```
52
+
53
+ #### Llama::Model#predict
54
+
55
+ ```ruby
56
+ def predict(
57
+ prompt, # string used as prompt
58
+ n_predict: 128 # number of tokens to predict
59
+ )
60
+ ```
61
+
62
+ ## Development
63
+
64
+ ```
65
+ git clone --recurse-submodules https://github.com/zfletch/llama-rb
66
+ cd llama-rb
67
+ ./bin/setup
68
+ ```
69
+
70
+ After checking out the repo, run `bin/setup` to install dependencies.
71
+ Then, run `rake spec` to run the tests.
72
+ You can also run `bin/console` for an interactive prompt that will allow you to experiment.
73
+
74
+ To install this gem onto your local machine, run `bundle exec rake install`.
75
+ To release a new version, update the version number in `version.rb`, and then run
76
+ `bundle exec rake release`, which will create a git tag for the version, push git
77
+ commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
78
+
79
+ ## Contributing
80
+
81
+ Bug reports and pull requests are welcome on GitHub at https://github.com/zfletch/llama-rb.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ require 'rubocop/rake_task'
7
+
8
+ RuboCop::RakeTask.new
9
+
10
+ task default: [:spec, :rubocop]
@@ -0,0 +1,311 @@
1
+ #include "common.h"
2
+
3
+ #include "ggml.h"
4
+
5
+ #include <cassert>
6
+ #include <cstring>
7
+ #include <fstream>
8
+ #include <string>
9
+ #include <iterator>
10
+ #include <algorithm>
11
+
12
+ #if defined(_MSC_VER) || defined(__MINGW32__)
13
+ #include <malloc.h> // using malloc.h with MSC/MINGW
14
+ #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
15
+ #include <alloca.h>
16
+ #endif
17
+
18
+ #if defined (_WIN32)
19
+ #pragma comment(lib,"kernel32.lib")
20
+ extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
21
+ extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
22
+ extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
23
+ extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
24
+ extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
25
+ #endif
26
+
27
+ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
28
+ // determine sensible default number of threads.
29
+ // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
30
+ #ifdef __linux__
31
+ std::ifstream cpuinfo("/proc/cpuinfo");
32
+ params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
33
+ std::istream_iterator<std::string>(),
34
+ std::string("processor"));
35
+ #endif
36
+ if (params.n_threads == 0) {
37
+ params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
38
+ }
39
+
40
+ bool invalid_param = false;
41
+ std::string arg;
42
+ for (int i = 1; i < argc; i++) {
43
+ arg = argv[i];
44
+
45
+ if (arg == "-s" || arg == "--seed") {
46
+ if (++i >= argc) {
47
+ invalid_param = true;
48
+ break;
49
+ }
50
+ params.seed = std::stoi(argv[i]);
51
+ } else if (arg == "-t" || arg == "--threads") {
52
+ if (++i >= argc) {
53
+ invalid_param = true;
54
+ break;
55
+ }
56
+ params.n_threads = std::stoi(argv[i]);
57
+ } else if (arg == "-p" || arg == "--prompt") {
58
+ if (++i >= argc) {
59
+ invalid_param = true;
60
+ break;
61
+ }
62
+ params.prompt = argv[i];
63
+ } else if (arg == "-f" || arg == "--file") {
64
+ if (++i >= argc) {
65
+ invalid_param = true;
66
+ break;
67
+ }
68
+ std::ifstream file(argv[i]);
69
+ std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
70
+ if (params.prompt.back() == '\n') {
71
+ params.prompt.pop_back();
72
+ }
73
+ } else if (arg == "-n" || arg == "--n_predict") {
74
+ if (++i >= argc) {
75
+ invalid_param = true;
76
+ break;
77
+ }
78
+ params.n_predict = std::stoi(argv[i]);
79
+ } else if (arg == "--top_k") {
80
+ if (++i >= argc) {
81
+ invalid_param = true;
82
+ break;
83
+ }
84
+ params.top_k = std::stoi(argv[i]);
85
+ } else if (arg == "-c" || arg == "--ctx_size") {
86
+ if (++i >= argc) {
87
+ invalid_param = true;
88
+ break;
89
+ }
90
+ params.n_ctx = std::stoi(argv[i]);
91
+ } else if (arg == "--memory_f32") {
92
+ params.memory_f16 = false;
93
+ } else if (arg == "--top_p") {
94
+ if (++i >= argc) {
95
+ invalid_param = true;
96
+ break;
97
+ }
98
+ params.top_p = std::stof(argv[i]);
99
+ } else if (arg == "--temp") {
100
+ if (++i >= argc) {
101
+ invalid_param = true;
102
+ break;
103
+ }
104
+ params.temp = std::stof(argv[i]);
105
+ } else if (arg == "--repeat_last_n") {
106
+ if (++i >= argc) {
107
+ invalid_param = true;
108
+ break;
109
+ }
110
+ params.repeat_last_n = std::stoi(argv[i]);
111
+ } else if (arg == "--repeat_penalty") {
112
+ if (++i >= argc) {
113
+ invalid_param = true;
114
+ break;
115
+ }
116
+ params.repeat_penalty = std::stof(argv[i]);
117
+ } else if (arg == "-b" || arg == "--batch_size") {
118
+ if (++i >= argc) {
119
+ invalid_param = true;
120
+ break;
121
+ }
122
+ params.n_batch = std::stoi(argv[i]);
123
+ params.n_batch = std::min(512, params.n_batch);
124
+ } else if (arg == "--keep") {
125
+ if (++i >= argc) {
126
+ invalid_param = true;
127
+ break;
128
+ }
129
+ params.n_keep = std::stoi(argv[i]);
130
+ } else if (arg == "-m" || arg == "--model") {
131
+ if (++i >= argc) {
132
+ invalid_param = true;
133
+ break;
134
+ }
135
+ params.model = argv[i];
136
+ } else if (arg == "-i" || arg == "--interactive") {
137
+ params.interactive = true;
138
+ } else if (arg == "--embedding") {
139
+ params.embedding = true;
140
+ } else if (arg == "--interactive-start") {
141
+ params.interactive = true;
142
+ } else if (arg == "--interactive-first") {
143
+ params.interactive_start = true;
144
+ } else if (arg == "-ins" || arg == "--instruct") {
145
+ params.instruct = true;
146
+ } else if (arg == "--color") {
147
+ params.use_color = true;
148
+ } else if (arg == "--mlock") {
149
+ params.use_mlock = true;
150
+ } else if (arg == "--mtest") {
151
+ params.mem_test = true;
152
+ } else if (arg == "--verbose-prompt") {
153
+ params.verbose_prompt = true;
154
+ } else if (arg == "-r" || arg == "--reverse-prompt") {
155
+ if (++i >= argc) {
156
+ invalid_param = true;
157
+ break;
158
+ }
159
+ params.antiprompt.push_back(argv[i]);
160
+ } else if (arg == "--perplexity") {
161
+ params.perplexity = true;
162
+ } else if (arg == "--ignore-eos") {
163
+ params.ignore_eos = true;
164
+ } else if (arg == "--n_parts") {
165
+ if (++i >= argc) {
166
+ invalid_param = true;
167
+ break;
168
+ }
169
+ params.n_parts = std::stoi(argv[i]);
170
+ } else if (arg == "-h" || arg == "--help") {
171
+ gpt_print_usage(argc, argv, params);
172
+ exit(0);
173
+ } else if (arg == "--random-prompt") {
174
+ params.random_prompt = true;
175
+ } else if (arg == "--in-prefix") {
176
+ if (++i >= argc) {
177
+ invalid_param = true;
178
+ break;
179
+ }
180
+ params.input_prefix = argv[i];
181
+ } else {
182
+ fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
183
+ gpt_print_usage(argc, argv, params);
184
+ exit(1);
185
+ }
186
+ }
187
+ if (invalid_param) {
188
+ fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
189
+ gpt_print_usage(argc, argv, params);
190
+ exit(1);
191
+ }
192
+
193
+ return true;
194
+ }
195
+
196
+ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
197
+ fprintf(stderr, "usage: %s [options]\n", argv[0]);
198
+ fprintf(stderr, "\n");
199
+ fprintf(stderr, "options:\n");
200
+ fprintf(stderr, " -h, --help show this help message and exit\n");
201
+ fprintf(stderr, " -i, --interactive run in interactive mode\n");
202
+ fprintf(stderr, " --interactive-first run in interactive mode and wait for input right away\n");
203
+ fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n");
204
+ fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n");
205
+ fprintf(stderr, " run in interactive mode and poll user input upon seeing PROMPT (can be\n");
206
+ fprintf(stderr, " specified more than once for multiple prompts).\n");
207
+ fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n");
208
+ fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for <= 0)\n");
209
+ fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
210
+ fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
211
+ fprintf(stderr, " prompt to start generation with (default: empty)\n");
212
+ fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
213
+ fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
214
+ fprintf(stderr, " -f FNAME, --file FNAME\n");
215
+ fprintf(stderr, " prompt file to start generation.\n");
216
+ fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
217
+ fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k);
218
+ fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", (double)params.top_p);
219
+ fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n);
220
+ fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", (double)params.repeat_penalty);
221
+ fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx);
222
+ fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating\n");
223
+ fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n");
224
+ fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
225
+ fprintf(stderr, " --n_parts N number of model parts (default: -1 = determine from dimensions)\n");
226
+ fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
227
+ fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
228
+ fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
229
+ if (ggml_mlock_supported()) {
230
+ fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
231
+ }
232
+ fprintf(stderr, " --mtest compute maximum memory usage\n");
233
+ fprintf(stderr, " --verbose-prompt print prompt before generation\n");
234
+ fprintf(stderr, " -m FNAME, --model FNAME\n");
235
+ fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
236
+ fprintf(stderr, "\n");
237
+ }
238
+
239
+ std::string gpt_random_prompt(std::mt19937 & rng) {
240
+ const int r = rng() % 10;
241
+ switch (r) {
242
+ case 0: return "So";
243
+ case 1: return "Once upon a time";
244
+ case 2: return "When";
245
+ case 3: return "The";
246
+ case 4: return "After";
247
+ case 5: return "If";
248
+ case 6: return "import";
249
+ case 7: return "He";
250
+ case 8: return "She";
251
+ case 9: return "They";
252
+ default: return "To";
253
+ }
254
+
255
+ return "The";
256
+ }
257
+
258
+ // TODO: not great allocating this every time
259
+ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
260
+ // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
261
+ std::vector<llama_token> res(text.size() + (int)add_bos);
262
+ int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
263
+ assert(n >= 0);
264
+ res.resize(n);
265
+
266
+ return res;
267
+ }
268
+
269
+ /* Keep track of current color of output, and emit ANSI code if it changes. */
270
+ void set_console_color(console_state & con_st, console_color_t color) {
271
+ if (con_st.use_color && con_st.color != color) {
272
+ switch(color) {
273
+ case CONSOLE_COLOR_DEFAULT:
274
+ printf(ANSI_COLOR_RESET);
275
+ break;
276
+ case CONSOLE_COLOR_PROMPT:
277
+ printf(ANSI_COLOR_YELLOW);
278
+ break;
279
+ case CONSOLE_COLOR_USER_INPUT:
280
+ printf(ANSI_BOLD ANSI_COLOR_GREEN);
281
+ break;
282
+ }
283
+ con_st.color = color;
284
+ }
285
+ }
286
+
287
+ #if defined (_WIN32)
288
+ void win32_console_init(bool enable_color) {
289
+ unsigned long dwMode = 0;
290
+ void* hConOut = GetStdHandle((unsigned long)-11); // STD_OUTPUT_HANDLE (-11)
291
+ if (!hConOut || hConOut == (void*)-1 || !GetConsoleMode(hConOut, &dwMode)) {
292
+ hConOut = GetStdHandle((unsigned long)-12); // STD_ERROR_HANDLE (-12)
293
+ if (hConOut && (hConOut == (void*)-1 || !GetConsoleMode(hConOut, &dwMode))) {
294
+ hConOut = 0;
295
+ }
296
+ }
297
+ if (hConOut) {
298
+ // Enable ANSI colors on Windows 10+
299
+ if (enable_color && !(dwMode & 0x4)) {
300
+ SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
301
+ }
302
+ // Set console output codepage to UTF8
303
+ SetConsoleOutputCP(65001); // CP_UTF8
304
+ }
305
+ void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
306
+ if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
307
+ // Set console input codepage to UTF8
308
+ SetConsoleCP(65001); // CP_UTF8
309
+ }
310
+ }
311
+ #endif
@@ -0,0 +1,95 @@
1
+ // Various helper functions and utilities
2
+
3
+ #pragma once
4
+
5
+ #include "llama.h"
6
+
7
+ #include <string>
8
+ #include <vector>
9
+ #include <random>
10
+ #include <thread>
11
+
12
+ //
13
+ // CLI argument parsing
14
+ //
15
+
16
+ struct gpt_params {
17
+ int32_t seed = -1; // RNG seed
18
+ int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
19
+ int32_t n_predict = 128; // new tokens to predict
20
+ int32_t repeat_last_n = 64; // last n tokens to penalize
21
+ int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
22
+ int32_t n_ctx = 512; // context size
23
+ int32_t n_batch = 8; // batch size for prompt processing
24
+ int32_t n_keep = 0; // number of tokens to keep from initial prompt
25
+
26
+ // sampling parameters
27
+ int32_t top_k = 40;
28
+ float top_p = 0.95f;
29
+ float temp = 0.80f;
30
+ float repeat_penalty = 1.10f;
31
+
32
+ std::string model = "models/lamma-7B/ggml-model.bin"; // model path
33
+ std::string prompt = "";
34
+ std::string input_prefix = ""; // string to prefix user inputs with
35
+
36
+
37
+ std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
38
+
39
+ bool memory_f16 = true; // use f16 instead of f32 for memory kv
40
+ bool random_prompt = false; // do not randomize prompt if none provided
41
+ bool use_color = false; // use color to distinguish generations and inputs
42
+ bool interactive = false; // interactive mode
43
+
44
+ bool embedding = false; // get only sentence embedding
45
+ bool interactive_start = false; // wait for user input immediately
46
+
47
+ bool instruct = false; // instruction mode (used for Alpaca models)
48
+ bool ignore_eos = false; // do not stop generating after eos
49
+ bool perplexity = false; // compute perplexity over the prompt
50
+ bool use_mlock = false; // use mlock to keep model in memory
51
+ bool mem_test = false; // compute maximum memory usage
52
+ bool verbose_prompt = false; // print prompt tokens before generation
53
+ };
54
+
55
+ bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
56
+
57
+ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
58
+
59
+ std::string gpt_random_prompt(std::mt19937 & rng);
60
+
61
+ //
62
+ // Vocab utils
63
+ //
64
+
65
+ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
66
+
67
+ //
68
+ // Console utils
69
+ //
70
+
71
+ #define ANSI_COLOR_RED "\x1b[31m"
72
+ #define ANSI_COLOR_GREEN "\x1b[32m"
73
+ #define ANSI_COLOR_YELLOW "\x1b[33m"
74
+ #define ANSI_COLOR_BLUE "\x1b[34m"
75
+ #define ANSI_COLOR_MAGENTA "\x1b[35m"
76
+ #define ANSI_COLOR_CYAN "\x1b[36m"
77
+ #define ANSI_COLOR_RESET "\x1b[0m"
78
+ #define ANSI_BOLD "\x1b[1m"
79
+
80
+ enum console_color_t {
81
+ CONSOLE_COLOR_DEFAULT=0,
82
+ CONSOLE_COLOR_PROMPT,
83
+ CONSOLE_COLOR_USER_INPUT
84
+ };
85
+
86
+ struct console_state {
87
+ bool use_color = false;
88
+ console_color_t color = CONSOLE_COLOR_DEFAULT;
89
+ };
90
+
91
+ void set_console_color(console_state & con_st, console_color_t color);
92
+
93
+ #if defined (_WIN32)
94
+ void win32_console_init(bool enable_color);
95
+ #endif
@@ -0,0 +1,12 @@
1
+ require 'mkmf-rice'
2
+
3
+ # Compile llama.cpp
4
+ # root = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
5
+ # llama_cpp = File.join(root, 'llama.cpp')
6
+ #
7
+ # Dir.chdir(llama_cpp) do
8
+ # system("make", exception: true)
9
+ # end
10
+
11
+ # Create Makefile for Ruby bindings
12
+ create_makefile 'llama/model'