llama-rb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +85 -0
- data/LICENSE +21 -0
- data/README.md +81 -0
- data/Rakefile +10 -0
- data/ext/llama/common.cpp +311 -0
- data/ext/llama/common.h +95 -0
- data/ext/llama/extconf.rb +12 -0
- data/ext/llama/ggml.c +10642 -0
- data/ext/llama/ggml.h +778 -0
- data/ext/llama/llama.cpp +1815 -0
- data/ext/llama/llama.h +152 -0
- data/ext/llama/model.cpp +192 -0
- data/lib/llama/model.rb +86 -0
- data/lib/llama/version.rb +3 -0
- data/lib/llama.rb +6 -0
- data/llama-rb.gemspec +50 -0
- data/models/.gitkeep +0 -0
- metadata +80 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ca78f6c05c53323ba5bd78ccdee815a77c4df10fde7c5497563e48281949cc3e
|
4
|
+
data.tar.gz: 7e225474cc183d2e50f3936d5bee984d394708ff7c48ad040b8b629c6f21fbb9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ea82a87539c0511175c6c5afe3c93e6bc5c141ea27bc4af0a4c9c9a8574736de59169bd8d847ca3afd385f27aeb306944f27a4e822233b54f3f47033be92d5ed
|
7
|
+
data.tar.gz: ca92bfd00bea78d88d90c93418a7cf86e9b6a3b436b86f6a7c87cf1906fed59a539085f549a0cdbaa0bf16815c3a221b51ce23313fab8adb6a6310a75fbbe8f5
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
llama-rb (0.1.0)
|
5
|
+
rice (~> 4.0.4)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
ast (2.4.2)
|
11
|
+
bundler-audit (0.9.1)
|
12
|
+
bundler (>= 1.2.0, < 3)
|
13
|
+
thor (~> 1.0)
|
14
|
+
diff-lcs (1.5.0)
|
15
|
+
gnar-style (0.13.0)
|
16
|
+
rubocop (>= 1.0.0, < 2.0)
|
17
|
+
rubocop-performance
|
18
|
+
rubocop-rails (~> 2.2.0)
|
19
|
+
thor
|
20
|
+
json (2.6.3)
|
21
|
+
parallel (1.22.1)
|
22
|
+
parser (3.2.2.0)
|
23
|
+
ast (~> 2.4.1)
|
24
|
+
rack (3.0.7)
|
25
|
+
rainbow (3.1.1)
|
26
|
+
rake (13.0.6)
|
27
|
+
regexp_parser (2.7.0)
|
28
|
+
rexml (3.2.5)
|
29
|
+
rice (4.0.4)
|
30
|
+
rspec (3.12.0)
|
31
|
+
rspec-core (~> 3.12.0)
|
32
|
+
rspec-expectations (~> 3.12.0)
|
33
|
+
rspec-mocks (~> 3.12.0)
|
34
|
+
rspec-core (3.12.1)
|
35
|
+
rspec-support (~> 3.12.0)
|
36
|
+
rspec-expectations (3.12.2)
|
37
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
38
|
+
rspec-support (~> 3.12.0)
|
39
|
+
rspec-mocks (3.12.5)
|
40
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
41
|
+
rspec-support (~> 3.12.0)
|
42
|
+
rspec-support (3.12.0)
|
43
|
+
rubocop (1.48.1)
|
44
|
+
json (~> 2.3)
|
45
|
+
parallel (~> 1.10)
|
46
|
+
parser (>= 3.2.0.0)
|
47
|
+
rainbow (>= 2.2.2, < 4.0)
|
48
|
+
regexp_parser (>= 1.8, < 3.0)
|
49
|
+
rexml (>= 3.2.5, < 4.0)
|
50
|
+
rubocop-ast (>= 1.26.0, < 2.0)
|
51
|
+
ruby-progressbar (~> 1.7)
|
52
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
53
|
+
rubocop-ast (1.28.0)
|
54
|
+
parser (>= 3.2.1.0)
|
55
|
+
rubocop-capybara (2.17.1)
|
56
|
+
rubocop (~> 1.41)
|
57
|
+
rubocop-performance (1.16.0)
|
58
|
+
rubocop (>= 1.7.0, < 2.0)
|
59
|
+
rubocop-ast (>= 0.4.0)
|
60
|
+
rubocop-rails (2.2.1)
|
61
|
+
rack (>= 1.1)
|
62
|
+
rubocop (>= 0.72.0)
|
63
|
+
rubocop-rake (0.6.0)
|
64
|
+
rubocop (~> 1.0)
|
65
|
+
rubocop-rspec (2.19.0)
|
66
|
+
rubocop (~> 1.33)
|
67
|
+
rubocop-capybara (~> 2.17)
|
68
|
+
ruby-progressbar (1.13.0)
|
69
|
+
thor (1.2.1)
|
70
|
+
unicode-display_width (2.4.2)
|
71
|
+
|
72
|
+
PLATFORMS
|
73
|
+
arm64-darwin-21
|
74
|
+
|
75
|
+
DEPENDENCIES
|
76
|
+
bundler-audit
|
77
|
+
gnar-style
|
78
|
+
llama-rb!
|
79
|
+
rake
|
80
|
+
rspec
|
81
|
+
rubocop-rake
|
82
|
+
rubocop-rspec
|
83
|
+
|
84
|
+
BUNDLED WITH
|
85
|
+
2.4.5
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# Llama-rb
|
2
|
+
|
3
|
+
Ruby wrapper for
|
4
|
+
[llama.cpp](https://github.com/ggerganov/llama.cpp).
|
5
|
+
|
6
|
+
This was hacked together in a weekend and versions `0.x.x` should be considered unstable.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Install the gem and add to the application's Gemfile by executing:
|
11
|
+
|
12
|
+
```
|
13
|
+
$ bundle add llama-rb
|
14
|
+
```
|
15
|
+
|
16
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
17
|
+
|
18
|
+
```
|
19
|
+
$ gem install llama-rb
|
20
|
+
```
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### Models
|
25
|
+
|
26
|
+
Before using this code, you will need to download and process at least one. See
|
27
|
+
[ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp#obtaining-and-verifying-the-facebook-llama-original-model-and-stanford-alpaca-model-data).
|
28
|
+
|
29
|
+
### Example
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require 'llama'
|
33
|
+
|
34
|
+
m = Llama::Model.new('models/7B/ggml-model-q4_0.bin')
|
35
|
+
m.predict('hello world')
|
36
|
+
```
|
37
|
+
|
38
|
+
### API
|
39
|
+
|
40
|
+
#### Llama::Model.new
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
def self.new(
|
44
|
+
model, # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
|
45
|
+
n_ctx: 512, # context size
|
46
|
+
n_parts: -1, # amount of model parts (-1 = determine from model dimensions)
|
47
|
+
seed: Time.now.to_i, # RNG seed
|
48
|
+
memory_f16: true, # use f16 instead of f32 for memory kv
|
49
|
+
use_mlock: false # use mlock to keep model in memory
|
50
|
+
)
|
51
|
+
```
|
52
|
+
|
53
|
+
#### Llama::Model#predict
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
def predict(
|
57
|
+
prompt, # string used as prompt
|
58
|
+
n_predict: 128 # number of tokens to predict
|
59
|
+
)
|
60
|
+
```
|
61
|
+
|
62
|
+
## Development
|
63
|
+
|
64
|
+
```
|
65
|
+
git clone --recurse-submodules https://github.com/zfletch/llama-rb
|
66
|
+
cd llama-rb
|
67
|
+
./bin/setup
|
68
|
+
```
|
69
|
+
|
70
|
+
After checking out the repo, run `bin/setup` to install dependencies.
|
71
|
+
Then, run `rake spec` to run the tests.
|
72
|
+
You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
73
|
+
|
74
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
75
|
+
To release a new version, update the version number in `version.rb`, and then run
|
76
|
+
`bundle exec rake release`, which will create a git tag for the version, push git
|
77
|
+
commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
78
|
+
|
79
|
+
## Contributing
|
80
|
+
|
81
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/zfletch/llama-rb.
|
data/Rakefile
ADDED
@@ -0,0 +1,311 @@
|
|
1
|
+
#include "common.h"
|
2
|
+
|
3
|
+
#include "ggml.h"
|
4
|
+
|
5
|
+
#include <cassert>
|
6
|
+
#include <cstring>
|
7
|
+
#include <fstream>
|
8
|
+
#include <string>
|
9
|
+
#include <iterator>
|
10
|
+
#include <algorithm>
|
11
|
+
|
12
|
+
#if defined(_MSC_VER) || defined(__MINGW32__)
|
13
|
+
#include <malloc.h> // using malloc.h with MSC/MINGW
|
14
|
+
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
15
|
+
#include <alloca.h>
|
16
|
+
#endif
|
17
|
+
|
18
|
+
#if defined (_WIN32)
|
19
|
+
#pragma comment(lib,"kernel32.lib")
|
20
|
+
extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
|
21
|
+
extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
|
22
|
+
extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
|
23
|
+
extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
|
24
|
+
extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
|
25
|
+
#endif
|
26
|
+
|
27
|
+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
28
|
+
// determine sensible default number of threads.
|
29
|
+
// std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
|
30
|
+
#ifdef __linux__
|
31
|
+
std::ifstream cpuinfo("/proc/cpuinfo");
|
32
|
+
params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
|
33
|
+
std::istream_iterator<std::string>(),
|
34
|
+
std::string("processor"));
|
35
|
+
#endif
|
36
|
+
if (params.n_threads == 0) {
|
37
|
+
params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
|
38
|
+
}
|
39
|
+
|
40
|
+
bool invalid_param = false;
|
41
|
+
std::string arg;
|
42
|
+
for (int i = 1; i < argc; i++) {
|
43
|
+
arg = argv[i];
|
44
|
+
|
45
|
+
if (arg == "-s" || arg == "--seed") {
|
46
|
+
if (++i >= argc) {
|
47
|
+
invalid_param = true;
|
48
|
+
break;
|
49
|
+
}
|
50
|
+
params.seed = std::stoi(argv[i]);
|
51
|
+
} else if (arg == "-t" || arg == "--threads") {
|
52
|
+
if (++i >= argc) {
|
53
|
+
invalid_param = true;
|
54
|
+
break;
|
55
|
+
}
|
56
|
+
params.n_threads = std::stoi(argv[i]);
|
57
|
+
} else if (arg == "-p" || arg == "--prompt") {
|
58
|
+
if (++i >= argc) {
|
59
|
+
invalid_param = true;
|
60
|
+
break;
|
61
|
+
}
|
62
|
+
params.prompt = argv[i];
|
63
|
+
} else if (arg == "-f" || arg == "--file") {
|
64
|
+
if (++i >= argc) {
|
65
|
+
invalid_param = true;
|
66
|
+
break;
|
67
|
+
}
|
68
|
+
std::ifstream file(argv[i]);
|
69
|
+
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
70
|
+
if (params.prompt.back() == '\n') {
|
71
|
+
params.prompt.pop_back();
|
72
|
+
}
|
73
|
+
} else if (arg == "-n" || arg == "--n_predict") {
|
74
|
+
if (++i >= argc) {
|
75
|
+
invalid_param = true;
|
76
|
+
break;
|
77
|
+
}
|
78
|
+
params.n_predict = std::stoi(argv[i]);
|
79
|
+
} else if (arg == "--top_k") {
|
80
|
+
if (++i >= argc) {
|
81
|
+
invalid_param = true;
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
params.top_k = std::stoi(argv[i]);
|
85
|
+
} else if (arg == "-c" || arg == "--ctx_size") {
|
86
|
+
if (++i >= argc) {
|
87
|
+
invalid_param = true;
|
88
|
+
break;
|
89
|
+
}
|
90
|
+
params.n_ctx = std::stoi(argv[i]);
|
91
|
+
} else if (arg == "--memory_f32") {
|
92
|
+
params.memory_f16 = false;
|
93
|
+
} else if (arg == "--top_p") {
|
94
|
+
if (++i >= argc) {
|
95
|
+
invalid_param = true;
|
96
|
+
break;
|
97
|
+
}
|
98
|
+
params.top_p = std::stof(argv[i]);
|
99
|
+
} else if (arg == "--temp") {
|
100
|
+
if (++i >= argc) {
|
101
|
+
invalid_param = true;
|
102
|
+
break;
|
103
|
+
}
|
104
|
+
params.temp = std::stof(argv[i]);
|
105
|
+
} else if (arg == "--repeat_last_n") {
|
106
|
+
if (++i >= argc) {
|
107
|
+
invalid_param = true;
|
108
|
+
break;
|
109
|
+
}
|
110
|
+
params.repeat_last_n = std::stoi(argv[i]);
|
111
|
+
} else if (arg == "--repeat_penalty") {
|
112
|
+
if (++i >= argc) {
|
113
|
+
invalid_param = true;
|
114
|
+
break;
|
115
|
+
}
|
116
|
+
params.repeat_penalty = std::stof(argv[i]);
|
117
|
+
} else if (arg == "-b" || arg == "--batch_size") {
|
118
|
+
if (++i >= argc) {
|
119
|
+
invalid_param = true;
|
120
|
+
break;
|
121
|
+
}
|
122
|
+
params.n_batch = std::stoi(argv[i]);
|
123
|
+
params.n_batch = std::min(512, params.n_batch);
|
124
|
+
} else if (arg == "--keep") {
|
125
|
+
if (++i >= argc) {
|
126
|
+
invalid_param = true;
|
127
|
+
break;
|
128
|
+
}
|
129
|
+
params.n_keep = std::stoi(argv[i]);
|
130
|
+
} else if (arg == "-m" || arg == "--model") {
|
131
|
+
if (++i >= argc) {
|
132
|
+
invalid_param = true;
|
133
|
+
break;
|
134
|
+
}
|
135
|
+
params.model = argv[i];
|
136
|
+
} else if (arg == "-i" || arg == "--interactive") {
|
137
|
+
params.interactive = true;
|
138
|
+
} else if (arg == "--embedding") {
|
139
|
+
params.embedding = true;
|
140
|
+
} else if (arg == "--interactive-start") {
|
141
|
+
params.interactive = true;
|
142
|
+
} else if (arg == "--interactive-first") {
|
143
|
+
params.interactive_start = true;
|
144
|
+
} else if (arg == "-ins" || arg == "--instruct") {
|
145
|
+
params.instruct = true;
|
146
|
+
} else if (arg == "--color") {
|
147
|
+
params.use_color = true;
|
148
|
+
} else if (arg == "--mlock") {
|
149
|
+
params.use_mlock = true;
|
150
|
+
} else if (arg == "--mtest") {
|
151
|
+
params.mem_test = true;
|
152
|
+
} else if (arg == "--verbose-prompt") {
|
153
|
+
params.verbose_prompt = true;
|
154
|
+
} else if (arg == "-r" || arg == "--reverse-prompt") {
|
155
|
+
if (++i >= argc) {
|
156
|
+
invalid_param = true;
|
157
|
+
break;
|
158
|
+
}
|
159
|
+
params.antiprompt.push_back(argv[i]);
|
160
|
+
} else if (arg == "--perplexity") {
|
161
|
+
params.perplexity = true;
|
162
|
+
} else if (arg == "--ignore-eos") {
|
163
|
+
params.ignore_eos = true;
|
164
|
+
} else if (arg == "--n_parts") {
|
165
|
+
if (++i >= argc) {
|
166
|
+
invalid_param = true;
|
167
|
+
break;
|
168
|
+
}
|
169
|
+
params.n_parts = std::stoi(argv[i]);
|
170
|
+
} else if (arg == "-h" || arg == "--help") {
|
171
|
+
gpt_print_usage(argc, argv, params);
|
172
|
+
exit(0);
|
173
|
+
} else if (arg == "--random-prompt") {
|
174
|
+
params.random_prompt = true;
|
175
|
+
} else if (arg == "--in-prefix") {
|
176
|
+
if (++i >= argc) {
|
177
|
+
invalid_param = true;
|
178
|
+
break;
|
179
|
+
}
|
180
|
+
params.input_prefix = argv[i];
|
181
|
+
} else {
|
182
|
+
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
183
|
+
gpt_print_usage(argc, argv, params);
|
184
|
+
exit(1);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
if (invalid_param) {
|
188
|
+
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
189
|
+
gpt_print_usage(argc, argv, params);
|
190
|
+
exit(1);
|
191
|
+
}
|
192
|
+
|
193
|
+
return true;
|
194
|
+
}
|
195
|
+
|
196
|
+
void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
197
|
+
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
198
|
+
fprintf(stderr, "\n");
|
199
|
+
fprintf(stderr, "options:\n");
|
200
|
+
fprintf(stderr, " -h, --help show this help message and exit\n");
|
201
|
+
fprintf(stderr, " -i, --interactive run in interactive mode\n");
|
202
|
+
fprintf(stderr, " --interactive-first run in interactive mode and wait for input right away\n");
|
203
|
+
fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n");
|
204
|
+
fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n");
|
205
|
+
fprintf(stderr, " run in interactive mode and poll user input upon seeing PROMPT (can be\n");
|
206
|
+
fprintf(stderr, " specified more than once for multiple prompts).\n");
|
207
|
+
fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n");
|
208
|
+
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for <= 0)\n");
|
209
|
+
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
210
|
+
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
|
211
|
+
fprintf(stderr, " prompt to start generation with (default: empty)\n");
|
212
|
+
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
|
213
|
+
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
|
214
|
+
fprintf(stderr, " -f FNAME, --file FNAME\n");
|
215
|
+
fprintf(stderr, " prompt file to start generation.\n");
|
216
|
+
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
|
217
|
+
fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k);
|
218
|
+
fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", (double)params.top_p);
|
219
|
+
fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n);
|
220
|
+
fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", (double)params.repeat_penalty);
|
221
|
+
fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx);
|
222
|
+
fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating\n");
|
223
|
+
fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n");
|
224
|
+
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
|
225
|
+
fprintf(stderr, " --n_parts N number of model parts (default: -1 = determine from dimensions)\n");
|
226
|
+
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
227
|
+
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
|
228
|
+
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
229
|
+
if (ggml_mlock_supported()) {
|
230
|
+
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
231
|
+
}
|
232
|
+
fprintf(stderr, " --mtest compute maximum memory usage\n");
|
233
|
+
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
|
234
|
+
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
235
|
+
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
236
|
+
fprintf(stderr, "\n");
|
237
|
+
}
|
238
|
+
|
239
|
+
std::string gpt_random_prompt(std::mt19937 & rng) {
|
240
|
+
const int r = rng() % 10;
|
241
|
+
switch (r) {
|
242
|
+
case 0: return "So";
|
243
|
+
case 1: return "Once upon a time";
|
244
|
+
case 2: return "When";
|
245
|
+
case 3: return "The";
|
246
|
+
case 4: return "After";
|
247
|
+
case 5: return "If";
|
248
|
+
case 6: return "import";
|
249
|
+
case 7: return "He";
|
250
|
+
case 8: return "She";
|
251
|
+
case 9: return "They";
|
252
|
+
default: return "To";
|
253
|
+
}
|
254
|
+
|
255
|
+
return "The";
|
256
|
+
}
|
257
|
+
|
258
|
+
// TODO: not great allocating this every time
|
259
|
+
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
260
|
+
// initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
|
261
|
+
std::vector<llama_token> res(text.size() + (int)add_bos);
|
262
|
+
int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
|
263
|
+
assert(n >= 0);
|
264
|
+
res.resize(n);
|
265
|
+
|
266
|
+
return res;
|
267
|
+
}
|
268
|
+
|
269
|
+
/* Keep track of current color of output, and emit ANSI code if it changes. */
|
270
|
+
void set_console_color(console_state & con_st, console_color_t color) {
|
271
|
+
if (con_st.use_color && con_st.color != color) {
|
272
|
+
switch(color) {
|
273
|
+
case CONSOLE_COLOR_DEFAULT:
|
274
|
+
printf(ANSI_COLOR_RESET);
|
275
|
+
break;
|
276
|
+
case CONSOLE_COLOR_PROMPT:
|
277
|
+
printf(ANSI_COLOR_YELLOW);
|
278
|
+
break;
|
279
|
+
case CONSOLE_COLOR_USER_INPUT:
|
280
|
+
printf(ANSI_BOLD ANSI_COLOR_GREEN);
|
281
|
+
break;
|
282
|
+
}
|
283
|
+
con_st.color = color;
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
#if defined (_WIN32)
|
288
|
+
void win32_console_init(bool enable_color) {
|
289
|
+
unsigned long dwMode = 0;
|
290
|
+
void* hConOut = GetStdHandle((unsigned long)-11); // STD_OUTPUT_HANDLE (-11)
|
291
|
+
if (!hConOut || hConOut == (void*)-1 || !GetConsoleMode(hConOut, &dwMode)) {
|
292
|
+
hConOut = GetStdHandle((unsigned long)-12); // STD_ERROR_HANDLE (-12)
|
293
|
+
if (hConOut && (hConOut == (void*)-1 || !GetConsoleMode(hConOut, &dwMode))) {
|
294
|
+
hConOut = 0;
|
295
|
+
}
|
296
|
+
}
|
297
|
+
if (hConOut) {
|
298
|
+
// Enable ANSI colors on Windows 10+
|
299
|
+
if (enable_color && !(dwMode & 0x4)) {
|
300
|
+
SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
|
301
|
+
}
|
302
|
+
// Set console output codepage to UTF8
|
303
|
+
SetConsoleOutputCP(65001); // CP_UTF8
|
304
|
+
}
|
305
|
+
void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
|
306
|
+
if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
|
307
|
+
// Set console input codepage to UTF8
|
308
|
+
SetConsoleCP(65001); // CP_UTF8
|
309
|
+
}
|
310
|
+
}
|
311
|
+
#endif
|
data/ext/llama/common.h
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
// Various helper functions and utilities
|
2
|
+
|
3
|
+
#pragma once
|
4
|
+
|
5
|
+
#include "llama.h"
|
6
|
+
|
7
|
+
#include <string>
|
8
|
+
#include <vector>
|
9
|
+
#include <random>
|
10
|
+
#include <thread>
|
11
|
+
|
12
|
+
//
|
13
|
+
// CLI argument parsing
|
14
|
+
//
|
15
|
+
|
16
|
+
struct gpt_params {
|
17
|
+
int32_t seed = -1; // RNG seed
|
18
|
+
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
19
|
+
int32_t n_predict = 128; // new tokens to predict
|
20
|
+
int32_t repeat_last_n = 64; // last n tokens to penalize
|
21
|
+
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
22
|
+
int32_t n_ctx = 512; // context size
|
23
|
+
int32_t n_batch = 8; // batch size for prompt processing
|
24
|
+
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
25
|
+
|
26
|
+
// sampling parameters
|
27
|
+
int32_t top_k = 40;
|
28
|
+
float top_p = 0.95f;
|
29
|
+
float temp = 0.80f;
|
30
|
+
float repeat_penalty = 1.10f;
|
31
|
+
|
32
|
+
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
33
|
+
std::string prompt = "";
|
34
|
+
std::string input_prefix = ""; // string to prefix user inputs with
|
35
|
+
|
36
|
+
|
37
|
+
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
|
38
|
+
|
39
|
+
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
40
|
+
bool random_prompt = false; // do not randomize prompt if none provided
|
41
|
+
bool use_color = false; // use color to distinguish generations and inputs
|
42
|
+
bool interactive = false; // interactive mode
|
43
|
+
|
44
|
+
bool embedding = false; // get only sentence embedding
|
45
|
+
bool interactive_start = false; // wait for user input immediately
|
46
|
+
|
47
|
+
bool instruct = false; // instruction mode (used for Alpaca models)
|
48
|
+
bool ignore_eos = false; // do not stop generating after eos
|
49
|
+
bool perplexity = false; // compute perplexity over the prompt
|
50
|
+
bool use_mlock = false; // use mlock to keep model in memory
|
51
|
+
bool mem_test = false; // compute maximum memory usage
|
52
|
+
bool verbose_prompt = false; // print prompt tokens before generation
|
53
|
+
};
|
54
|
+
|
55
|
+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
56
|
+
|
57
|
+
void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
|
58
|
+
|
59
|
+
std::string gpt_random_prompt(std::mt19937 & rng);
|
60
|
+
|
61
|
+
//
|
62
|
+
// Vocab utils
|
63
|
+
//
|
64
|
+
|
65
|
+
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
|
66
|
+
|
67
|
+
//
|
68
|
+
// Console utils
|
69
|
+
//
|
70
|
+
|
71
|
+
#define ANSI_COLOR_RED "\x1b[31m"
|
72
|
+
#define ANSI_COLOR_GREEN "\x1b[32m"
|
73
|
+
#define ANSI_COLOR_YELLOW "\x1b[33m"
|
74
|
+
#define ANSI_COLOR_BLUE "\x1b[34m"
|
75
|
+
#define ANSI_COLOR_MAGENTA "\x1b[35m"
|
76
|
+
#define ANSI_COLOR_CYAN "\x1b[36m"
|
77
|
+
#define ANSI_COLOR_RESET "\x1b[0m"
|
78
|
+
#define ANSI_BOLD "\x1b[1m"
|
79
|
+
|
80
|
+
enum console_color_t {
|
81
|
+
CONSOLE_COLOR_DEFAULT=0,
|
82
|
+
CONSOLE_COLOR_PROMPT,
|
83
|
+
CONSOLE_COLOR_USER_INPUT
|
84
|
+
};
|
85
|
+
|
86
|
+
struct console_state {
|
87
|
+
bool use_color = false;
|
88
|
+
console_color_t color = CONSOLE_COLOR_DEFAULT;
|
89
|
+
};
|
90
|
+
|
91
|
+
void set_console_color(console_state & con_st, console_color_t color);
|
92
|
+
|
93
|
+
#if defined (_WIN32)
|
94
|
+
void win32_console_init(bool enable_color);
|
95
|
+
#endif
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mkmf-rice'
|
2
|
+
|
3
|
+
# Compile llama.cpp
|
4
|
+
# root = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
5
|
+
# llama_cpp = File.join(root, 'llama.cpp')
|
6
|
+
#
|
7
|
+
# Dir.chdir(llama_cpp) do
|
8
|
+
# system("make", exception: true)
|
9
|
+
# end
|
10
|
+
|
11
|
+
# Create Makefile for Ruby bindings
|
12
|
+
create_makefile 'llama/model'
|