llama_cpp 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/examples/README.md +32 -0
- data/examples/embedding.rb +37 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +553 -313
- data/ext/llama_cpp/src/ggml-metal.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.m +157 -19
- data/ext/llama_cpp/src/ggml-metal.metal +149 -0
- data/ext/llama_cpp/src/ggml-opencl.cpp +493 -4
- data/ext/llama_cpp/src/ggml.c +736 -98
- data/ext/llama_cpp/src/ggml.h +140 -9
- data/ext/llama_cpp/src/llama.cpp +58 -31
- data/ext/llama_cpp/src/llama.h +8 -9
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5e221d4831be790a990b121e6ac780d10b4cbfb85b2a9b4284d9c216f6e5604
|
4
|
+
data.tar.gz: fba76ac1a70bfd7b02b8d123c57e4c8096a29ac7f658bb090cda91c6a54752d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 994029383219077e134d170177954251c20ede6d1c83843ecd22c42eeae83584079d124b41702f55add7f3f237e9bdb14382fbd37dde2d0e74f8cffcfed1715b
|
7
|
+
data.tar.gz: ca4e94b6ddf4e4e9ddabbb2b8309cf4b2b06a881df09fdf4ad96e27c4f1f620ca0024ac46f69d9b474849c074a5c9ba9b0440777a0b52a12413bc356457a02f3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## [[0.2.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.2.1...v0.2.2)] - 2023-06-24
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-a09f919 to master-7487137.
|
4
|
+
|
1
5
|
## [[0.2.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.2.0...v0.2.1)] - 2023-06-17
|
2
6
|
|
3
7
|
- Bump bundled llama.cpp from master-4de0334 to master-a09f919.
|
data/examples/README.md
CHANGED
@@ -58,3 +58,35 @@ Options:
|
|
58
58
|
|
59
59
|
Start chat
|
60
60
|
```
|
61
|
+
|
62
|
+
## embedding.rb
|
63
|
+
|
64
|
+
### Usage
|
65
|
+
|
66
|
+
```sh
|
67
|
+
$ cd examples
|
68
|
+
$ gem install llama_cpp thor
|
69
|
+
$ ./embedding.rb -m /path/to/quantized-model.bin -t 4 -p 'Hello, World.'
|
70
|
+
...
|
71
|
+
0.7191136479377747 0.5564611554145813 1.4210394620895386 -1.4874695539474487
|
72
|
+
```
|
73
|
+
|
74
|
+
### Options
|
75
|
+
|
76
|
+
```
|
77
|
+
$ ./embedding.rb help main
|
78
|
+
Usage:
|
79
|
+
embedding.rb main -m, --model=MODEL -p, --prompt=PROMPT
|
80
|
+
|
81
|
+
Options:
|
82
|
+
-s, [--seed=N] # random seed
|
83
|
+
# Default: -1
|
84
|
+
-t, [--threads=N] # number of threads
|
85
|
+
# Default: 2
|
86
|
+
-m, --model=MODEL # path to model file
|
87
|
+
-p, --prompt=PROMPT # prompt to generate embedding
|
88
|
+
[--n-gpu-layers=N] # number of layers on GPU
|
89
|
+
# Default: 0
|
90
|
+
|
91
|
+
Extract embedding from prompt
|
92
|
+
```
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# embedding.rb extracts embedding from prompt.
|
5
|
+
# It is created with reference to embedding.cpp in llama.cpp examples:
|
6
|
+
# - https://github.com/ggerganov/llama.cpp/blob/master/examples/embedding/embedding.cpp
|
7
|
+
|
8
|
+
require 'llama_cpp'
|
9
|
+
require 'thor'
|
10
|
+
|
11
|
+
class Embedding < Thor # rubocop:disable Style/Documentation
|
12
|
+
default_command :main
|
13
|
+
desc 'main', 'Extract embedding from prompt'
|
14
|
+
option :seed, type: :numeric, aliases: '-s', desc: 'random seed', default: -1
|
15
|
+
option :threads, type: :numeric, aliases: '-t', desc: 'number of threads', default: 2
|
16
|
+
option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
|
17
|
+
option :prompt, type: :string, aliases: '-p', desc: 'prompt to generate embedding', required: true
|
18
|
+
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
19
|
+
def main # rubocop:disable Metrics/AbcSize
|
20
|
+
params = LLaMACpp::ContextParams.new
|
21
|
+
params.seed = options[:seed]
|
22
|
+
params.n_gpu_layers = options[:n_gpu_layers]
|
23
|
+
params.embedding = true
|
24
|
+
context = LLaMACpp::Context.new(model_path: options[:model], params: params)
|
25
|
+
|
26
|
+
embd_input = context.tokenize(text: options[:prompt], add_bos: true)
|
27
|
+
|
28
|
+
return unless embd_input.size.positive?
|
29
|
+
|
30
|
+
context.eval(tokens: embd_input, n_past: 0, n_threads: options[:threads])
|
31
|
+
|
32
|
+
context.embeddings.each { |val| print("#{val} ") }
|
33
|
+
print("\n")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
Embedding.start(ARGV)
|