gpt_neox_client 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/LICENSE.txt +21 -0
- data/README.md +68 -0
- data/ext/gpt_neox_client/extconf.rb +25 -0
- data/ext/gpt_neox_client/gpt_neox_client.cpp +316 -0
- data/ext/gpt_neox_client/gpt_neox_client.h +10 -0
- data/ext/gpt_neox_client/src/LICENSE +21 -0
- data/ext/gpt_neox_client/src/common-ggml.cpp +246 -0
- data/ext/gpt_neox_client/src/common-ggml.h +18 -0
- data/ext/gpt_neox_client/src/common.cpp +809 -0
- data/ext/gpt_neox_client/src/common.h +176 -0
- data/ext/gpt_neox_client/src/dr_wav.h +6434 -0
- data/ext/gpt_neox_client/src/ggml/ggml-alloc.c +594 -0
- data/ext/gpt_neox_client/src/ggml/ggml-alloc.h +26 -0
- data/ext/gpt_neox_client/src/ggml/ggml-cuda.cu +6756 -0
- data/ext/gpt_neox_client/src/ggml/ggml-cuda.h +46 -0
- data/ext/gpt_neox_client/src/ggml/ggml-metal.h +85 -0
- data/ext/gpt_neox_client/src/ggml/ggml-metal.m +1195 -0
- data/ext/gpt_neox_client/src/ggml/ggml-metal.metal +2049 -0
- data/ext/gpt_neox_client/src/ggml/ggml-opencl.cpp +1865 -0
- data/ext/gpt_neox_client/src/ggml/ggml-opencl.h +25 -0
- data/ext/gpt_neox_client/src/ggml/ggml.c +20632 -0
- data/ext/gpt_neox_client/src/ggml/ggml.h +1997 -0
- data/ext/gpt_neox_client/src/main.cpp +814 -0
- data/lib/gpt_neox_client/version.rb +7 -0
- data/lib/gpt_neox_client.rb +4 -0
- metadata +75 -0
@@ -0,0 +1,176 @@
|
|
1
|
+
// Various helper functions and utilities
|
2
|
+
|
3
|
+
#pragma once
|
4
|
+
|
5
|
+
#include <string>
|
6
|
+
#include <map>
|
7
|
+
#include <vector>
|
8
|
+
#include <random>
|
9
|
+
#include <thread>
|
10
|
+
|
11
|
+
#define COMMON_SAMPLE_RATE 16000
|
12
|
+
|
13
|
+
//
|
14
|
+
// GPT CLI argument parsing
|
15
|
+
//
|
16
|
+
|
17
|
+
struct gpt_params {
|
18
|
+
int32_t seed = -1; // RNG seed
|
19
|
+
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
20
|
+
int32_t n_predict = 200; // new tokens to predict
|
21
|
+
int32_t n_batch = 8; // batch size for prompt processing
|
22
|
+
|
23
|
+
// sampling parameters
|
24
|
+
int32_t top_k = 40;
|
25
|
+
float top_p = 0.9f;
|
26
|
+
float temp = 0.9f;
|
27
|
+
int32_t repeat_last_n = 64;
|
28
|
+
float repeat_penalty = 1.00f;
|
29
|
+
|
30
|
+
std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
|
31
|
+
std::string prompt = "";
|
32
|
+
std::string token_test = "";
|
33
|
+
|
34
|
+
bool interactive = false;
|
35
|
+
int32_t interactive_port = -1;
|
36
|
+
|
37
|
+
int32_t n_gpu_layers = 0;
|
38
|
+
};
|
39
|
+
|
40
|
+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
41
|
+
|
42
|
+
void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
|
43
|
+
|
44
|
+
std::string gpt_random_prompt(std::mt19937 & rng);
|
45
|
+
|
46
|
+
//
|
47
|
+
// Vocab utils
|
48
|
+
//
|
49
|
+
|
50
|
+
std::string trim(const std::string & s);
|
51
|
+
|
52
|
+
std::string replace(
|
53
|
+
const std::string & s,
|
54
|
+
const std::string & from,
|
55
|
+
const std::string & to);
|
56
|
+
|
57
|
+
struct gpt_vocab {
|
58
|
+
using id = int32_t;
|
59
|
+
using token = std::string;
|
60
|
+
|
61
|
+
std::map<token, id> token_to_id;
|
62
|
+
std::map<id, token> id_to_token;
|
63
|
+
std::vector<std::string> special_tokens;
|
64
|
+
|
65
|
+
void add_special_token(const std::string & token);
|
66
|
+
};
|
67
|
+
|
68
|
+
// poor-man's JSON parsing
|
69
|
+
std::map<std::string, int32_t> json_parse(const std::string & fname);
|
70
|
+
|
71
|
+
std::string convert_to_utf8(const std::wstring & input);
|
72
|
+
|
73
|
+
std::wstring convert_to_wstring(const std::string & input);
|
74
|
+
|
75
|
+
void gpt_split_words(std::string str, std::vector<std::string>& words);
|
76
|
+
|
77
|
+
// split text into tokens
|
78
|
+
//
|
79
|
+
// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
|
80
|
+
//
|
81
|
+
// Regex (Python):
|
82
|
+
// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
|
83
|
+
//
|
84
|
+
// Regex (C++):
|
85
|
+
// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"
|
86
|
+
//
|
87
|
+
std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text);
|
88
|
+
|
89
|
+
// test outputs of gpt_tokenize
|
90
|
+
//
|
91
|
+
// - compare with tokens generated by the huggingface tokenizer
|
92
|
+
// - test cases are chosen based on the model's main language (under 'prompt' directory)
|
93
|
+
// - if all sentences are tokenized identically, print 'All tests passed.'
|
94
|
+
// - otherwise, print sentence, huggingface tokens, ggml tokens
|
95
|
+
//
|
96
|
+
void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test);
|
97
|
+
|
98
|
+
// load the tokens from encoder.json
|
99
|
+
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
100
|
+
|
101
|
+
// sample next token given probabilities for each embedding
|
102
|
+
//
|
103
|
+
// - consider only the top K tokens
|
104
|
+
// - from them, consider only the top tokens with cumulative probability > P
|
105
|
+
//
|
106
|
+
// TODO: not sure if this implementation is correct
|
107
|
+
// TODO: temperature is not implemented
|
108
|
+
//
|
109
|
+
gpt_vocab::id gpt_sample_top_k_top_p(
|
110
|
+
const gpt_vocab & vocab,
|
111
|
+
const float * logits,
|
112
|
+
int top_k,
|
113
|
+
double top_p,
|
114
|
+
double temp,
|
115
|
+
std::mt19937 & rng);
|
116
|
+
|
117
|
+
gpt_vocab::id gpt_sample_top_k_top_p_repeat(
|
118
|
+
const gpt_vocab & vocab,
|
119
|
+
const float * logits,
|
120
|
+
const int32_t * last_n_tokens_data,
|
121
|
+
size_t last_n_tokens_data_size,
|
122
|
+
int top_k,
|
123
|
+
double top_p,
|
124
|
+
double temp,
|
125
|
+
int repeat_last_n,
|
126
|
+
float repeat_penalty,
|
127
|
+
std::mt19937 & rng);
|
128
|
+
|
129
|
+
//
|
130
|
+
// Audio utils
|
131
|
+
//
|
132
|
+
|
133
|
+
// Read WAV audio file and store the PCM data into pcmf32
|
134
|
+
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
|
135
|
+
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
|
136
|
+
bool read_wav(
|
137
|
+
const std::string & fname,
|
138
|
+
std::vector<float> & pcmf32,
|
139
|
+
std::vector<std::vector<float>> & pcmf32s,
|
140
|
+
bool stereo);
|
141
|
+
|
142
|
+
// Apply a high-pass frequency filter to PCM audio
|
143
|
+
// Suppresses frequencies below cutoff Hz
|
144
|
+
void high_pass_filter(
|
145
|
+
std::vector<float> & data,
|
146
|
+
float cutoff,
|
147
|
+
float sample_rate);
|
148
|
+
|
149
|
+
// Basic voice activity detection (VAD) using audio energy adaptive threshold
|
150
|
+
bool vad_simple(
|
151
|
+
std::vector<float> & pcmf32,
|
152
|
+
int sample_rate,
|
153
|
+
int last_ms,
|
154
|
+
float vad_thold,
|
155
|
+
float freq_thold,
|
156
|
+
bool verbose);
|
157
|
+
|
158
|
+
// compute similarity between two strings using Levenshtein distance
|
159
|
+
float similarity(const std::string & s0, const std::string & s1);
|
160
|
+
|
161
|
+
//
|
162
|
+
// SAM argument parsing
|
163
|
+
//
|
164
|
+
|
165
|
+
struct sam_params {
|
166
|
+
int32_t seed = -1; // RNG seed
|
167
|
+
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
168
|
+
|
169
|
+
std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
|
170
|
+
std::string fname_inp = "img.jpg";
|
171
|
+
std::string fname_out = "img.out";
|
172
|
+
};
|
173
|
+
|
174
|
+
bool sam_params_parse(int argc, char ** argv, sam_params & params);
|
175
|
+
|
176
|
+
void sam_print_usage(int argc, char ** argv, const sam_params & params);
|