llama_cpp 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/examples/README.md +32 -0
- data/examples/embedding.rb +37 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +553 -313
- data/ext/llama_cpp/src/ggml-metal.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.m +157 -19
- data/ext/llama_cpp/src/ggml-metal.metal +149 -0
- data/ext/llama_cpp/src/ggml-opencl.cpp +493 -4
- data/ext/llama_cpp/src/ggml.c +736 -98
- data/ext/llama_cpp/src/ggml.h +140 -9
- data/ext/llama_cpp/src/llama.cpp +58 -31
- data/ext/llama_cpp/src/llama.h +8 -9
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5e221d4831be790a990b121e6ac780d10b4cbfb85b2a9b4284d9c216f6e5604
|
4
|
+
data.tar.gz: fba76ac1a70bfd7b02b8d123c57e4c8096a29ac7f658bb090cda91c6a54752d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 994029383219077e134d170177954251c20ede6d1c83843ecd22c42eeae83584079d124b41702f55add7f3f237e9bdb14382fbd37dde2d0e74f8cffcfed1715b
|
7
|
+
data.tar.gz: ca4e94b6ddf4e4e9ddabbb2b8309cf4b2b06a881df09fdf4ad96e27c4f1f620ca0024ac46f69d9b474849c074a5c9ba9b0440777a0b52a12413bc356457a02f3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## [[0.2.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.2.1...v0.2.2)] - 2023-06-24
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-a09f919 to master-7487137.
|
4
|
+
|
1
5
|
## [[0.2.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.2.0...v0.2.1)] - 2023-06-17
|
2
6
|
|
3
7
|
- Bump bundled llama.cpp from master-4de0334 to master-a09f919.
|
data/examples/README.md
CHANGED
@@ -58,3 +58,35 @@ Options:
|
|
58
58
|
|
59
59
|
Start chat
|
60
60
|
```
|
61
|
+
|
62
|
+
## embedding.rb
|
63
|
+
|
64
|
+
### Usage
|
65
|
+
|
66
|
+
```sh
|
67
|
+
$ cd examples
|
68
|
+
$ gem install llama_cpp thor
|
69
|
+
$ ./embedding.rb -m /path/to/quantized-model.bin -t 4 -p 'Hello, World.'
|
70
|
+
...
|
71
|
+
0.7191136479377747 0.5564611554145813 1.4210394620895386 -1.4874695539474487
|
72
|
+
```
|
73
|
+
|
74
|
+
### Options
|
75
|
+
|
76
|
+
```
|
77
|
+
$ ./embedding.rb help main
|
78
|
+
Usage:
|
79
|
+
embedding.rb main -m, --model=MODEL -p, --prompt=PROMPT
|
80
|
+
|
81
|
+
Options:
|
82
|
+
-s, [--seed=N] # random seed
|
83
|
+
# Default: -1
|
84
|
+
-t, [--threads=N] # number of threads
|
85
|
+
# Default: 2
|
86
|
+
-m, --model=MODEL # path to model file
|
87
|
+
-p, --prompt=PROMPT # prompt to generate embedding
|
88
|
+
[--n-gpu-layers=N] # number of layers on GPU
|
89
|
+
# Default: 0
|
90
|
+
|
91
|
+
Extract embedding from prompt
|
92
|
+
```
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# embedding.rb extracts embedding from prompt.
|
5
|
+
# It is created with reference to embedding.cpp in llama.cpp examples:
|
6
|
+
# - https://github.com/ggerganov/llama.cpp/blob/master/examples/embedding/embedding.cpp
|
7
|
+
|
8
|
+
require 'llama_cpp'
|
9
|
+
require 'thor'
|
10
|
+
|
11
|
+
class Embedding < Thor # rubocop:disable Style/Documentation
|
12
|
+
default_command :main
|
13
|
+
desc 'main', 'Extract embedding from prompt'
|
14
|
+
option :seed, type: :numeric, aliases: '-s', desc: 'random seed', default: -1
|
15
|
+
option :threads, type: :numeric, aliases: '-t', desc: 'number of threads', default: 2
|
16
|
+
option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
|
17
|
+
option :prompt, type: :string, aliases: '-p', desc: 'prompt to generate embedding', required: true
|
18
|
+
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
19
|
+
def main # rubocop:disable Metrics/AbcSize
|
20
|
+
params = LLaMACpp::ContextParams.new
|
21
|
+
params.seed = options[:seed]
|
22
|
+
params.n_gpu_layers = options[:n_gpu_layers]
|
23
|
+
params.embedding = true
|
24
|
+
context = LLaMACpp::Context.new(model_path: options[:model], params: params)
|
25
|
+
|
26
|
+
embd_input = context.tokenize(text: options[:prompt], add_bos: true)
|
27
|
+
|
28
|
+
return unless embd_input.size.positive?
|
29
|
+
|
30
|
+
context.eval(tokens: embd_input, n_past: 0, n_threads: options[:threads])
|
31
|
+
|
32
|
+
context.embeddings.each { |val| print("#{val} ") }
|
33
|
+
print("\n")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
Embedding.start(ARGV)
|