llama_cpp 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/ext/llama_cpp/extconf.rb +13 -0
- data/ext/llama_cpp/llama_cpp.cpp +500 -0
- data/ext/llama_cpp/llama_cpp.h +12 -0
- data/ext/llama_cpp/src/LICENSE +21 -0
- data/ext/llama_cpp/src/ggml.c +10339 -0
- data/ext/llama_cpp/src/ggml.h +773 -0
- data/ext/llama_cpp/src/llama.cpp +1864 -0
- data/ext/llama_cpp/src/llama.h +152 -0
- data/lib/llama_cpp/version.rb +10 -0
- data/lib/llama_cpp.rb +72 -0
- metadata +63 -0
@@ -0,0 +1,152 @@
|
|
1
|
+
#ifndef LLAMA_H
|
2
|
+
#define LLAMA_H
|
3
|
+
|
4
|
+
#include <stddef.h>
|
5
|
+
#include <stdint.h>
|
6
|
+
#include <stdbool.h>
|
7
|
+
|
8
|
+
#ifdef LLAMA_SHARED
|
9
|
+
# ifdef _WIN32
|
10
|
+
# ifdef LLAMA_BUILD
|
11
|
+
# define LLAMA_API __declspec(dllexport)
|
12
|
+
# else
|
13
|
+
# define LLAMA_API __declspec(dllimport)
|
14
|
+
# endif
|
15
|
+
# else
|
16
|
+
# define LLAMA_API __attribute__ ((visibility ("default")))
|
17
|
+
# endif
|
18
|
+
#else
|
19
|
+
# define LLAMA_API
|
20
|
+
#endif
|
21
|
+
|
22
|
+
#define LLAMA_FILE_VERSION 1
|
23
|
+
#define LLAMA_FILE_MAGIC 0x67676d66 // 'ggmf' in hex
|
24
|
+
#define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
|
25
|
+
|
26
|
+
#ifdef __cplusplus
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
//
|
31
|
+
// C interface
|
32
|
+
//
|
33
|
+
// TODO: show sample usage
|
34
|
+
//
|
35
|
+
|
36
|
+
struct llama_context;
|
37
|
+
|
38
|
+
typedef int llama_token;
|
39
|
+
|
40
|
+
typedef struct llama_token_data {
|
41
|
+
llama_token id; // token id
|
42
|
+
|
43
|
+
float p; // probability of the token
|
44
|
+
float plog; // log probability of the token
|
45
|
+
|
46
|
+
} llama_token_data;
|
47
|
+
|
48
|
+
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
49
|
+
|
50
|
+
struct llama_context_params {
|
51
|
+
int n_ctx; // text context
|
52
|
+
int n_parts; // -1 for default
|
53
|
+
int seed; // RNG seed, 0 for random
|
54
|
+
|
55
|
+
bool f16_kv; // use fp16 for KV cache
|
56
|
+
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
57
|
+
bool vocab_only; // only load the vocabulary, no weights
|
58
|
+
bool use_mlock; // force system to keep model in RAM
|
59
|
+
bool embedding; // embedding mode only
|
60
|
+
|
61
|
+
// called with a progress value between 0 and 1, pass NULL to disable
|
62
|
+
llama_progress_callback progress_callback;
|
63
|
+
// context pointer passed to the progress callback
|
64
|
+
void * progress_callback_user_data;
|
65
|
+
};
|
66
|
+
|
67
|
+
LLAMA_API struct llama_context_params llama_context_default_params();
|
68
|
+
|
69
|
+
// Various functions for loading a ggml llama model.
|
70
|
+
// Allocate (almost) all memory needed for the model.
|
71
|
+
// Return NULL on failure
|
72
|
+
LLAMA_API struct llama_context * llama_init_from_file(
|
73
|
+
const char * path_model,
|
74
|
+
struct llama_context_params params);
|
75
|
+
|
76
|
+
// Frees all allocated memory
|
77
|
+
LLAMA_API void llama_free(struct llama_context * ctx);
|
78
|
+
|
79
|
+
// TODO: not great API - very likely to change
|
80
|
+
// Returns 0 on success
|
81
|
+
LLAMA_API int llama_model_quantize(
|
82
|
+
const char * fname_inp,
|
83
|
+
const char * fname_out,
|
84
|
+
int itype);
|
85
|
+
|
86
|
+
// Run the llama inference to obtain the logits and probabilities for the next token.
|
87
|
+
// tokens + n_tokens is the provided batch of new tokens to process
|
88
|
+
// n_past is the number of tokens to use from previous eval calls
|
89
|
+
// Returns 0 on success
|
90
|
+
LLAMA_API int llama_eval(
|
91
|
+
struct llama_context * ctx,
|
92
|
+
const llama_token * tokens,
|
93
|
+
int n_tokens,
|
94
|
+
int n_past,
|
95
|
+
int n_threads);
|
96
|
+
|
97
|
+
// Convert the provided text into tokens.
|
98
|
+
// The tokens pointer must be large enough to hold the resulting tokens.
|
99
|
+
// Returns the number of tokens on success, no more than n_max_tokens
|
100
|
+
// Returns a negative number on failure - the number of tokens that would have been returned
|
101
|
+
// TODO: not sure if correct
|
102
|
+
LLAMA_API int llama_tokenize(
|
103
|
+
struct llama_context * ctx,
|
104
|
+
const char * text,
|
105
|
+
llama_token * tokens,
|
106
|
+
int n_max_tokens,
|
107
|
+
bool add_bos);
|
108
|
+
|
109
|
+
LLAMA_API int llama_n_vocab(struct llama_context * ctx);
|
110
|
+
LLAMA_API int llama_n_ctx (struct llama_context * ctx);
|
111
|
+
LLAMA_API int llama_n_embd (struct llama_context * ctx);
|
112
|
+
|
113
|
+
// Token logits obtained from the last call to llama_eval()
|
114
|
+
// The logits for the last token are stored in the last row
|
115
|
+
// Can be mutated in order to change the probabilities of the next token
|
116
|
+
// Rows: n_tokens
|
117
|
+
// Cols: n_vocab
|
118
|
+
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
119
|
+
|
120
|
+
// Get the embeddings for the input
|
121
|
+
// shape: [n_embd] (1-dimensional)
|
122
|
+
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
123
|
+
|
124
|
+
// Token Id -> String. Uses the vocabulary in the provided context
|
125
|
+
LLAMA_API const char * llama_token_to_str(struct llama_context * ctx, llama_token token);
|
126
|
+
|
127
|
+
// Special tokens
|
128
|
+
LLAMA_API llama_token llama_token_bos();
|
129
|
+
LLAMA_API llama_token llama_token_eos();
|
130
|
+
|
131
|
+
// TODO: improve the last_n_tokens interface ?
|
132
|
+
LLAMA_API llama_token llama_sample_top_p_top_k(
|
133
|
+
struct llama_context * ctx,
|
134
|
+
const llama_token * last_n_tokens_data,
|
135
|
+
int last_n_tokens_size,
|
136
|
+
int top_k,
|
137
|
+
float top_p,
|
138
|
+
float temp,
|
139
|
+
float repeat_penalty);
|
140
|
+
|
141
|
+
// Performance information
|
142
|
+
LLAMA_API void llama_print_timings(struct llama_context * ctx);
|
143
|
+
LLAMA_API void llama_reset_timings(struct llama_context * ctx);
|
144
|
+
|
145
|
+
// Print system information
|
146
|
+
LLAMA_API const char * llama_print_system_info(void);
|
147
|
+
|
148
|
+
#ifdef __cplusplus
|
149
|
+
}
|
150
|
+
#endif
|
151
|
+
|
152
|
+
#endif
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
|
+
module LLaMACpp
|
5
|
+
# The version of llama_cpp.rb you install.
|
6
|
+
VERSION = '0.0.1'
|
7
|
+
|
8
|
+
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
+
LLAMA_CPP_VERSION = 'master-2a98bc1'
|
10
|
+
end
|
data/lib/llama_cpp.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'llama_cpp/version'
|
4
|
+
require_relative 'llama_cpp/llama_cpp'
|
5
|
+
|
6
|
+
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
7
|
+
module LLaMACpp
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Generates sentences following the given prompt for operation check.
|
11
|
+
#
|
12
|
+
# @param context [LLaMACpp::Context]
|
13
|
+
# @param prompt [String]
|
14
|
+
# @return [String]
|
15
|
+
def generate(context, prompt) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
|
16
|
+
prompt.insert(0, ' ')
|
17
|
+
|
18
|
+
embd_input = context.tokenize(text: prompt, add_bos: true)
|
19
|
+
|
20
|
+
n_ctx = context.n_ctx
|
21
|
+
last_n_tokens = [0] * n_ctx
|
22
|
+
|
23
|
+
embd = []
|
24
|
+
n_consumed = 0
|
25
|
+
n_keep = 10
|
26
|
+
n_past = 0
|
27
|
+
n_remain = 128
|
28
|
+
repeat_last_n = 64
|
29
|
+
output = []
|
30
|
+
|
31
|
+
while n_remain != 0
|
32
|
+
unless embd.empty?
|
33
|
+
if n_past + embd.size > n_ctx
|
34
|
+
n_left = n_past - n_keep
|
35
|
+
n_past = n_keep
|
36
|
+
embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
|
37
|
+
end
|
38
|
+
|
39
|
+
context.eval(tokens: embd, n_past: n_past)
|
40
|
+
end
|
41
|
+
|
42
|
+
n_past += embd.size
|
43
|
+
embd.clear
|
44
|
+
|
45
|
+
if embd_input.size <= n_consumed
|
46
|
+
start = n_ctx - repeat_last_n
|
47
|
+
id = context.sample_top_p_top_k(
|
48
|
+
last_n_tokens[start...(start + repeat_last_n)], top_k: 40, top_p: 0.95, temp: 0.80, penalty: 1.1
|
49
|
+
)
|
50
|
+
last_n_tokens.shift
|
51
|
+
last_n_tokens.push(id)
|
52
|
+
|
53
|
+
embd.push(id)
|
54
|
+
n_remain -= 1
|
55
|
+
else
|
56
|
+
while embd_input.size > n_consumed
|
57
|
+
embd.push(embd_input[n_consumed])
|
58
|
+
last_n_tokens.shift
|
59
|
+
last_n_tokens.push(embd_input[n_consumed])
|
60
|
+
n_consumed += 1
|
61
|
+
break if embd.size >= 512
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
embd.each { |token| output << context.token_to_str(token) }
|
66
|
+
|
67
|
+
break if embd[-1] == LLaMACpp.token_eos
|
68
|
+
end
|
69
|
+
|
70
|
+
output.join.delete_prefix(prompt).strip
|
71
|
+
end
|
72
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: llama_cpp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-04-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
|
+
email:
|
15
|
+
- yoshoku@outlook.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/llama_cpp/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- CHANGELOG.md
|
22
|
+
- CODE_OF_CONDUCT.md
|
23
|
+
- LICENSE.txt
|
24
|
+
- README.md
|
25
|
+
- ext/llama_cpp/extconf.rb
|
26
|
+
- ext/llama_cpp/llama_cpp.cpp
|
27
|
+
- ext/llama_cpp/llama_cpp.h
|
28
|
+
- ext/llama_cpp/src/LICENSE
|
29
|
+
- ext/llama_cpp/src/ggml.c
|
30
|
+
- ext/llama_cpp/src/ggml.h
|
31
|
+
- ext/llama_cpp/src/llama.cpp
|
32
|
+
- ext/llama_cpp/src/llama.h
|
33
|
+
- lib/llama_cpp.rb
|
34
|
+
- lib/llama_cpp/version.rb
|
35
|
+
homepage: https://github.com/yoshoku/llama_cpp.rb
|
36
|
+
licenses:
|
37
|
+
- MIT
|
38
|
+
metadata:
|
39
|
+
homepage_uri: https://github.com/yoshoku/llama_cpp.rb
|
40
|
+
source_code_uri: https://github.com/yoshoku/llama_cpp.rb
|
41
|
+
changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
|
42
|
+
documentation_uri: https://yoshoku.github.io/llama_cpp.rb/doc/
|
43
|
+
rubygems_mfa_required: 'true'
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubygems_version: 3.3.26
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: Ruby bindings for the llama.cpp.
|
63
|
+
test_files: []
|