llama_cpp 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,152 @@
1
+ #ifndef LLAMA_H
2
+ #define LLAMA_H
3
+
4
+ #include <stddef.h>
5
+ #include <stdint.h>
6
+ #include <stdbool.h>
7
+
8
+ #ifdef LLAMA_SHARED
9
+ # ifdef _WIN32
10
+ # ifdef LLAMA_BUILD
11
+ # define LLAMA_API __declspec(dllexport)
12
+ # else
13
+ # define LLAMA_API __declspec(dllimport)
14
+ # endif
15
+ # else
16
+ # define LLAMA_API __attribute__ ((visibility ("default")))
17
+ # endif
18
+ #else
19
+ # define LLAMA_API
20
+ #endif
21
+
22
+ #define LLAMA_FILE_VERSION 1
23
+ #define LLAMA_FILE_MAGIC 0x67676d66 // 'ggmf' in hex
24
+ #define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
25
+
26
+ #ifdef __cplusplus
27
+ extern "C" {
28
+ #endif
29
+
30
+ //
31
+ // C interface
32
+ //
33
+ // TODO: show sample usage
34
+ //
35
+
36
+ struct llama_context;
37
+
38
+ typedef int llama_token;
39
+
40
+ typedef struct llama_token_data {
41
+ llama_token id; // token id
42
+
43
+ float p; // probability of the token
44
+ float plog; // log probability of the token
45
+
46
+ } llama_token_data;
47
+
48
+ typedef void (*llama_progress_callback)(float progress, void *ctx);
49
+
50
+ struct llama_context_params {
51
+ int n_ctx; // text context
52
+ int n_parts; // -1 for default
53
+ int seed; // RNG seed, 0 for random
54
+
55
+ bool f16_kv; // use fp16 for KV cache
56
+ bool logits_all; // the llama_eval() call computes all logits, not just the last one
57
+ bool vocab_only; // only load the vocabulary, no weights
58
+ bool use_mlock; // force system to keep model in RAM
59
+ bool embedding; // embedding mode only
60
+
61
+ // called with a progress value between 0 and 1, pass NULL to disable
62
+ llama_progress_callback progress_callback;
63
+ // context pointer passed to the progress callback
64
+ void * progress_callback_user_data;
65
+ };
66
+
67
+ LLAMA_API struct llama_context_params llama_context_default_params();
68
+
69
+ // Various functions for loading a ggml llama model.
70
+ // Allocate (almost) all memory needed for the model.
71
+ // Return NULL on failure
72
+ LLAMA_API struct llama_context * llama_init_from_file(
73
+ const char * path_model,
74
+ struct llama_context_params params);
75
+
76
+ // Frees all allocated memory
77
+ LLAMA_API void llama_free(struct llama_context * ctx);
78
+
79
+ // TODO: not great API - very likely to change
80
+ // Returns 0 on success
81
+ LLAMA_API int llama_model_quantize(
82
+ const char * fname_inp,
83
+ const char * fname_out,
84
+ int itype);
85
+
86
+ // Run the llama inference to obtain the logits and probabilities for the next token.
87
+ // tokens + n_tokens is the provided batch of new tokens to process
88
+ // n_past is the number of tokens to use from previous eval calls
89
+ // Returns 0 on success
90
+ LLAMA_API int llama_eval(
91
+ struct llama_context * ctx,
92
+ const llama_token * tokens,
93
+ int n_tokens,
94
+ int n_past,
95
+ int n_threads);
96
+
97
+ // Convert the provided text into tokens.
98
+ // The tokens pointer must be large enough to hold the resulting tokens.
99
+ // Returns the number of tokens on success, no more than n_max_tokens
100
+ // Returns a negative number on failure - the number of tokens that would have been returned
101
+ // TODO: not sure if correct
102
+ LLAMA_API int llama_tokenize(
103
+ struct llama_context * ctx,
104
+ const char * text,
105
+ llama_token * tokens,
106
+ int n_max_tokens,
107
+ bool add_bos);
108
+
109
+ LLAMA_API int llama_n_vocab(struct llama_context * ctx);
110
+ LLAMA_API int llama_n_ctx (struct llama_context * ctx);
111
+ LLAMA_API int llama_n_embd (struct llama_context * ctx);
112
+
113
+ // Token logits obtained from the last call to llama_eval()
114
+ // The logits for the last token are stored in the last row
115
+ // Can be mutated in order to change the probabilities of the next token
116
+ // Rows: n_tokens
117
+ // Cols: n_vocab
118
+ LLAMA_API float * llama_get_logits(struct llama_context * ctx);
119
+
120
+ // Get the embeddings for the input
121
+ // shape: [n_embd] (1-dimensional)
122
+ LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
123
+
124
+ // Token Id -> String. Uses the vocabulary in the provided context
125
+ LLAMA_API const char * llama_token_to_str(struct llama_context * ctx, llama_token token);
126
+
127
+ // Special tokens
128
+ LLAMA_API llama_token llama_token_bos();
129
+ LLAMA_API llama_token llama_token_eos();
130
+
131
+ // TODO: improve the last_n_tokens interface ?
132
+ LLAMA_API llama_token llama_sample_top_p_top_k(
133
+ struct llama_context * ctx,
134
+ const llama_token * last_n_tokens_data,
135
+ int last_n_tokens_size,
136
+ int top_k,
137
+ float top_p,
138
+ float temp,
139
+ float repeat_penalty);
140
+
141
+ // Performance information
142
+ LLAMA_API void llama_print_timings(struct llama_context * ctx);
143
+ LLAMA_API void llama_reset_timings(struct llama_context * ctx);
144
+
145
+ // Print system information
146
+ LLAMA_API const char * llama_print_system_info(void);
147
+
148
+ #ifdef __cplusplus
149
+ }
150
+ #endif
151
+
152
+ #endif
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
+ module LLaMACpp
5
+ # The version of llama_cpp.rb you install.
6
+ VERSION = '0.0.1'
7
+
8
+ # The version of llama.cpp bundled with llama_cpp.rb.
9
+ LLAMA_CPP_VERSION = 'master-2a98bc1'
10
+ end
data/lib/llama_cpp.rb ADDED
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'llama_cpp/version'
4
+ require_relative 'llama_cpp/llama_cpp'
5
+
6
+ # llama_cpp.rb provides Ruby bindings for the llama.cpp.
7
+ module LLaMACpp
8
+ module_function
9
+
10
+ # Generates sentences following the given prompt for operation check.
11
+ #
12
+ # @param context [LLaMACpp::Context]
13
+ # @param prompt [String]
14
+ # @return [String]
15
+ def generate(context, prompt) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
16
+ prompt.insert(0, ' ')
17
+
18
+ embd_input = context.tokenize(text: prompt, add_bos: true)
19
+
20
+ n_ctx = context.n_ctx
21
+ last_n_tokens = [0] * n_ctx
22
+
23
+ embd = []
24
+ n_consumed = 0
25
+ n_keep = 10
26
+ n_past = 0
27
+ n_remain = 128
28
+ repeat_last_n = 64
29
+ output = []
30
+
31
+ while n_remain != 0
32
+ unless embd.empty?
33
+ if n_past + embd.size > n_ctx
34
+ n_left = n_past - n_keep
35
+ n_past = n_keep
36
+ embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
37
+ end
38
+
39
+ context.eval(tokens: embd, n_past: n_past)
40
+ end
41
+
42
+ n_past += embd.size
43
+ embd.clear
44
+
45
+ if embd_input.size <= n_consumed
46
+ start = n_ctx - repeat_last_n
47
+ id = context.sample_top_p_top_k(
48
+ last_n_tokens[start...(start + repeat_last_n)], top_k: 40, top_p: 0.95, temp: 0.80, penalty: 1.1
49
+ )
50
+ last_n_tokens.shift
51
+ last_n_tokens.push(id)
52
+
53
+ embd.push(id)
54
+ n_remain -= 1
55
+ else
56
+ while embd_input.size > n_consumed
57
+ embd.push(embd_input[n_consumed])
58
+ last_n_tokens.shift
59
+ last_n_tokens.push(embd_input[n_consumed])
60
+ n_consumed += 1
61
+ break if embd.size >= 512
62
+ end
63
+ end
64
+
65
+ embd.each { |token| output << context.token_to_str(token) }
66
+
67
+ break if embd[-1] == LLaMACpp.token_eos
68
+ end
69
+
70
+ output.join.delete_prefix(prompt).strip
71
+ end
72
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: llama_cpp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-04-02 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
+ email:
15
+ - yoshoku@outlook.com
16
+ executables: []
17
+ extensions:
18
+ - ext/llama_cpp/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - CHANGELOG.md
22
+ - CODE_OF_CONDUCT.md
23
+ - LICENSE.txt
24
+ - README.md
25
+ - ext/llama_cpp/extconf.rb
26
+ - ext/llama_cpp/llama_cpp.cpp
27
+ - ext/llama_cpp/llama_cpp.h
28
+ - ext/llama_cpp/src/LICENSE
29
+ - ext/llama_cpp/src/ggml.c
30
+ - ext/llama_cpp/src/ggml.h
31
+ - ext/llama_cpp/src/llama.cpp
32
+ - ext/llama_cpp/src/llama.h
33
+ - lib/llama_cpp.rb
34
+ - lib/llama_cpp/version.rb
35
+ homepage: https://github.com/yoshoku/llama_cpp.rb
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ homepage_uri: https://github.com/yoshoku/llama_cpp.rb
40
+ source_code_uri: https://github.com/yoshoku/llama_cpp.rb
41
+ changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
42
+ documentation_uri: https://yoshoku.github.io/llama_cpp.rb/doc/
43
+ rubygems_mfa_required: 'true'
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 3.3.26
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Ruby bindings for the llama.cpp.
63
+ test_files: []